{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "8b9d6583-6ae1-4bb6-970c-c80d42dfebdb",
    "_uuid": "d56b7961b91d7f67554ec48bd1d2485fb7419120"
   },
   "source": [
    "# 1 数据集信息\n",
    "澳大利亚广播公司 ABC 发布的新闻头条数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5"
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd \n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.feature_extraction import text\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.cluster import KMeans\n",
    "from nltk.tokenize import RegexpTokenizer\n",
    "from nltk.stem.snowball import SnowballStemmer\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 读取数据集\n",
    "data = pd.read_csv(\"/data/course_data/abcnews-date-text/abcnews-date-text.csv\",error_bad_lines=False,usecols =[\"headline_text\"])\n",
    "data.head()\n",
    "data = data.head(10000)  # 获取部分数据快速运行，你可以尝试修改使用的数据量查看后续的建模效果，不过注意使用的数据越多后续模型训练的时间越长"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "_cell_guid": "3e44d856-a323-45ac-b7cc-80d77385060f",
    "_uuid": "a498ee778ab763e0801b8f9cf14e1d4d01f38846"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 10000 entries, 0 to 9999\n",
      "Data columns (total 1 columns):\n",
      " #   Column         Non-Null Count  Dtype \n",
      "---  ------         --------------  ----- \n",
      " 0   headline_text  10000 non-null  object\n",
      "dtypes: object(1)\n",
      "memory usage: 78.2+ KB\n"
     ]
    }
   ],
   "source": [
    "# 打印数据信息\n",
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>headline_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>aba decides against community broadcasting lic...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>act fire witnesses must be aware of defamation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>a g calls for infrastructure protection summit</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>air nz staff in aust strike for pay rise</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>air nz strike to affect australian travellers</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       headline_text\n",
       "0  aba decides against community broadcasting lic...\n",
       "1     act fire witnesses must be aware of defamation\n",
       "2     a g calls for infrastructure protection summit\n",
       "3           air nz staff in aust strike for pay rise\n",
       "4      air nz strike to affect australian travellers"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 大概查看一下数据集\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "ff887c6d-0470-4f62-860b-9457b223bb8c",
    "_uuid": "eb590852f097f66ea53be9a970789430fc3f6a63"
   },
   "source": [
    "## 1.1 删除重复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "_cell_guid": "42392880-315c-41bf-98e8-a1cbfab72f6e",
    "_uuid": "1e0143660cbb59acf14ed07c847fd9bc3ca85045"
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>headline_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>9253</th>\n",
       "      <td>5 australian sars cases being monitored</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9458</th>\n",
       "      <td>5 australian sars cases being monitored</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6402</th>\n",
       "      <td>all walks of life follow protest path</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6192</th>\n",
       "      <td>all walks of life follow protest path</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6197</th>\n",
       "      <td>asio briefs crean on terrorist risk</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6409</th>\n",
       "      <td>asio briefs crean on terrorist risk</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6609</th>\n",
       "      <td>australia celebrates world cup victory</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6813</th>\n",
       "      <td>australia celebrates world cup victory</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                headline_text\n",
       "9253  5 australian sars cases being monitored\n",
       "9458  5 australian sars cases being monitored\n",
       "6402    all walks of life follow protest path\n",
       "6192    all walks of life follow protest path\n",
       "6197      asio briefs crean on terrorist risk\n",
       "6409      asio briefs crean on terrorist risk\n",
       "6609   australia celebrates world cup victory\n",
       "6813   australia celebrates world cup victory"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 查看重复的数据行，pandas.DataFrame.duplicated 使用方法详见：https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.duplicated.html\n",
    "data[data['headline_text'].duplicated(keep=False)].sort_values('headline_text').head(8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "_cell_guid": "9f5ff611-397e-45b7-9616-7bc33f6e81bb",
    "_uuid": "4e5e82d7c6fb0e7b14f8b5772bea14e448f88fcc"
   },
   "outputs": [],
   "source": [
    "# 删除重复行，pandas.DataFrame.drop_duplicates 使用方法详见：https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop_duplicates.html\n",
    "########## 第一题 ～ 1行 ##########\n",
    "######### your code #########\n",
    "data = data.drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "f1bb8d35-27aa-4ff4-9a39-4329517aa6a4",
    "_uuid": "f022fdf6441499ed52b34c063240f4f28b2ff3a5"
   },
   "source": [
    "# 2 数据预处理 "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "7ecba7c7-84d0-426b-aa9c-9ca05d45da75",
    "_uuid": "d2f1e1b88fb7b29fd47c249be3af044ef1e2a246"
   },
   "source": [
    "## 2.1 为向量化表示进行前处理\n",
    "进行自然语言处理时，必须将单词转换为机器学习算法可以利用的向量。如果目标是对文本数据进行机器学习建模，例如电影评论或推文或其他任何内容，则需要将文本数据转换为数字。此过程称为“嵌入”或“向量化”。  \n",
    "进行向量化时，请务必记住，它不仅仅是将单个单词变成单个数字。单词可以转换为数字，整个文档就可以转换为向量。向量的维度往往不止一个，而且对于文本数据，向量通常是高维的。这是因为特征数据的每个维度将对应一个单词，而我们所处理的文档通常包含数千个单词。\n",
    "\n",
    "## 2.2 TF-IDF\n",
    "在信息检索中，tf–idf 或 TFIDF（term frequency–inverse document frequency）是一种数值统计，旨在反映单词对语料库中文档的重要性。在信息检索，文本挖掘和用户建模的搜索中，它通常用作加权因子。 tf-idf 值与单词在文档中出现的次数成正比，同时被单词在语料库中的出现频率所抵消，这有助于调整某些单词通常会更频繁出现的事实。 如今，tf-idf是最流行的术语加权方案之一。在数字图书馆领域，有83％的基于文本的推荐系统使用tf-idf。\n",
    "\n",
    "搜索引擎经常使用tf–idf加权方案的变体作为在给定用户查询时对文档相关性进行评分和排名的主要工具。tf–idf可成功用于各种领域的停用词过滤，包括文本摘要和分类。\n",
    "\n",
    "排名函数中最简单的是通过将每个查询词的tf–idf相加得出，许多更复杂的排名函数是此简单模型的变体。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "_cell_guid": "c7e595ab-440c-4ad7-98e4-4358cc724d8c",
    "_uuid": "9c1c23ecabae8217a9aa8f90371f2a30053cc6f1"
   },
   "outputs": [],
   "source": [
    "punc = ['.', ',', '\"', \"'\", '?', '!', ':', ';', '(', ')', '[', ']', '{', '}',\"%\"]\n",
    "stop_words = text.ENGLISH_STOP_WORDS.union(punc)\n",
    "desc = data['headline_text'].values\n",
    "############ 第二题 ～ 1行 ############\n",
    "############ your code start ############\n",
    "# TfidfVectorizer 使用方法详见：http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html\n",
    "vectorizer = TfidfVectorizer()\n",
    "############ your code end ############\n",
    "X = vectorizer.fit_transform(desc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "_cell_guid": "eb56971e-5412-4138-a4cd-0e14844796be",
    "_uuid": "10af64e15e2f08c30da71b847432eedc2aece199"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10034\n",
      "['kirsten', 'kirwan', 'kit', 'kits', 'kitty', 'kivilev', 'kivilevs', 'kiwi', 'kiwis', 'klim', 'klims', 'klitschko', 'klusener', 'km', 'kms', 'knee', 'knife', 'knights', 'knives', 'knock', 'knocked', 'knocking', 'knopfler', 'know', 'knowles', 'known', 'koala', 'koalas', 'koen', 'kong', 'kookaburras', 'korea', 'korean', 'koreans', 'koreas', 'kosciuszko', 'kosmina', 'kournikova', 'kph', 'krakow', 'kravitz', 'krige', 'kroger', 'kucera', 'kuerten', 'kununurra', 'kuranyi', 'kurd', 'kurdish', 'kurds', 'kuwait', 'kuwaiti', 'kwan', 'kyi', 'kylie', 'kyoto', 'kyrgyzstan', 'la', 'label', 'labels', 'labor', 'laboratories', 'labors', 'labour', 'lack', 'laden', 'ladens', 'ladies', 'lag', 'lagoon', 'lagoons', 'lags', 'laid', 'lake', 'lakers', 'lakes', 'lambie', 'laments', 'land', 'landfill', 'landholder', 'landholders', 'landing', 'landmine', 'landmines', 'lands', 'landslide', 'langer', 'langmack', 'language', 'lanka', 'lankan', 'lankans', 'lara', 'large', 'larger', 'larkham', 'lash', 'lashes', 'lashings']\n"
     ]
    }
   ],
   "source": [
    "word_features = vectorizer.get_feature_names()\n",
    "print(len(word_features))\n",
    "print(word_features[5000:5100])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "871b1bd6-c411-4ff6-a784-9b376a0db4e6",
    "_uuid": "e480f5b88938660f05c09f75af5f9f58d7110096"
   },
   "source": [
    "## 2.3 Stemming\n",
    "stemming 是将单词还原为词干（即词根形式）的过程。 词根形式不一定是单词本身，而是可以通过连接正确的后缀来生成单词。 例如，“fish”，“fishes”和“fishing”这几个词的词干都是“fish”，这是一个正确的单词。 另一方面，“study”，“studies”和“studying”一词源于“studi”，这不是一个正确的英语单词。\n",
    "\n",
    "## 2.4 Tokenizing\n",
    "Tokenization 将句子分解为单词和标点符号"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "_cell_guid": "536a1a88-48a3-43d0-b368-ccf31947e5b1",
    "_uuid": "5d25104db183624b990a1d64e10cc618fd8ee715"
   },
   "outputs": [],
   "source": [
    "stemmer = SnowballStemmer('english')  # SnowballStemmer 使用方法详见： https://www.kite.com/python/docs/nltk.SnowballStemmer\n",
    "tokenizer = RegexpTokenizer(r'[a-zA-Z\\']+')  #  RegexpTokenizer 使用方法详见： https://www.kite.com/python/docs/nltk.RegexpTokenizer\n",
    "\n",
    "def tokenize(text):\n",
    "    \"\"\"先进行 stemming 然后 tokenize\n",
    "    params:\n",
    "    text: 一个句子\n",
    "    \n",
    "    return:\n",
    "    tokens 列表\n",
    "    \"\"\"\n",
    "    ############ 第三题 ～ 1行 （使用列表推导） ############ \n",
    "    ############ your code start ############\n",
    "    return tokenizer.tokenize(\" \".join([stemmer.stem(i) for i in text.split()]))\n",
    "    ############ your code end ############"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "9877cf33-ebe8-46e4-b26e-c6f673617517",
    "_uuid": "d190c4d6b9cb52ad70a021e3475704538b47f85f"
   },
   "source": [
    "## 2.5  使用停用词、stemming 和自定义的 tokenizing 进行 TFIDF 向量化"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "_cell_guid": "18e1d30c-5515-4c0d-89e4-6af99658bee3",
    "_uuid": "5fcc93fa3093f30d181ffa38b4ca46b133316952",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.7/site-packages/sklearn/feature_extraction/text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['abov', 'afterward', 'alon', 'alreadi', 'alway', 'ani', 'anoth', 'anyon', 'anyth', 'anywher', 'becam', 'becaus', 'becom', 'befor', 'besid', 'cri', 'describ', 'dure', 'els', 'elsewher', 'empti', 'everi', 'everyon', 'everyth', 'everywher', 'fifti', 'forti', 'henc', 'hereaft', 'herebi', 'howev', 'hundr', 'inde', 'mani', 'meanwhil', 'moreov', 'nobodi', 'noon', 'noth', 'nowher', 'onc', 'onli', 'otherwis', 'ourselv', 'perhap', 'pleas', 'sever', 'sinc', 'sincer', 'sixti', 'someon', 'someth', 'sometim', 'somewher', 'themselv', 'thenc', 'thereaft', 'therebi', 'therefor', 'togeth', 'twelv', 'twenti', 'veri', 'whatev', 'whenc', 'whenev', 'wherea', 'whereaft', 'wherebi', 'wherev', 'whi', 'yourselv'] not in stop_words.\n",
      "  'stop_words.' % sorted(inconsistent))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6909\n",
      "['aa', 'aac', 'ab', 'aba', 'abalon', 'abandon', 'abar', 'abattoir', 'abbott', 'abc', 'abdic', 'abduct', 'abid', 'abigroup', 'abil', 'abl', 'ablaz', 'aboard', 'abolit', 'aborigin', 'abort', 'abov', 'abreast', 'abroad', 'absenc', 'absent', 'abu', 'abund', 'abus', 'abysm', 'ac', 'aca', 'academ', 'academi', 'acapulco', 'acb', 'accc', 'accent', 'accept', 'access', 'accid', 'accident', 'accommod', 'account', 'accredit', 'accus', 'ace', 'aceh', 'achiev', 'acid']\n"
     ]
    }
   ],
   "source": [
    "vectorizer2 = TfidfVectorizer(stop_words = stop_words, tokenizer = tokenize)\n",
    "X2 = vectorizer2.fit_transform(desc)\n",
    "word_features2 = vectorizer2.get_feature_names()\n",
    "print(len(word_features2))\n",
    "print(word_features2[:50]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "_cell_guid": "acdc11f4-7b5c-4aee-8c42-1a752bffbc2e",
    "_uuid": "244a3015e5b4f4f84c174586fa875f5cf49cff1d"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.7/site-packages/sklearn/feature_extraction/text.py:385: UserWarning: Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens ['abov', 'afterward', 'alon', 'alreadi', 'alway', 'ani', 'anoth', 'anyon', 'anyth', 'anywher', 'becam', 'becaus', 'becom', 'befor', 'besid', 'cri', 'describ', 'dure', 'els', 'elsewher', 'empti', 'everi', 'everyon', 'everyth', 'everywher', 'fifti', 'forti', 'henc', 'hereaft', 'herebi', 'howev', 'hundr', 'inde', 'mani', 'meanwhil', 'moreov', 'nobodi', 'noon', 'noth', 'nowher', 'onc', 'onli', 'otherwis', 'ourselv', 'perhap', 'pleas', 'sever', 'sinc', 'sincer', 'sixti', 'someon', 'someth', 'sometim', 'somewher', 'themselv', 'thenc', 'thereaft', 'therebi', 'therefor', 'togeth', 'twelv', 'twenti', 'veri', 'whatev', 'whenc', 'whenev', 'wherea', 'whereaft', 'wherebi', 'wherev', 'whi', 'yourselv'] not in stop_words.\n",
      "  'stop_words.' % sorted(inconsistent))\n"
     ]
    }
   ],
   "source": [
    "vectorizer3 = TfidfVectorizer(stop_words = stop_words, tokenizer = tokenize, max_features = 1000)\n",
    "X3 = vectorizer3.fit_transform(desc)\n",
    "words = vectorizer3.get_feature_names()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "e8a8c1ed-8970-49e0-bae5-0e0d85abea84",
    "_uuid": "b5f2e66c25d17527b78ae1a1fe174f1ac6310286"
   },
   "source": [
    "# 3 K-Means 聚类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "c9a1312e-45f7-44e4-9407-13012bdf97ce",
    "_uuid": "39ba3fa0f53454111495da2f7e7719572afec93a"
   },
   "source": [
    "## 3.1 使用手肘法选择聚类簇的数量\n",
    "随着聚类数k的增大,样本划分会更加的精细,每个簇的聚合程度会逐渐提高,那么误差平方和SSE自然会逐渐变小,并且当k小于真实的簇类数时,由于k的增大会大幅增加每个簇的聚合程度,因此SSE的下降幅度会很大,而当k到达真实聚类数时,再增加k所得到的聚合程度回报会迅速变小,所以SSE的下降幅度会骤减,然后随着k值的继续增大而趋于平缓,也就是说SSE和k的关系类似于手肘的形状,而这个肘部对应的k值就是数据的真实聚类数.因此这种方法被称为手肘法."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "_cell_guid": "c72b26ab-4bef-44e6-b854-5bca3cd1f217",
    "_uuid": "992bea80b2647c4f4e564bb020ce8eab07db6b78",
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd3hVVfb/8fcnCR2CQAJSDQhIUxFCUSCI4IiIYhnHjmPDggqiM/Nzqk6zF7AOgiIWxBFHRCk2DEVaaNKlKghCAJEeCKzfH+fw9Yoxl5LLTcJ6Pc99cu8+bZ2rZGWXs7fMDOeccy4/CfEOwDnnXOHnycI551xUniycc85F5cnCOedcVJ4snHPOReXJwjnnXFSeLFyRJOkBSa8fg+ukSTJJSeHnzyXdHOvrHgsFeS+Shkj6Z0GcyxVOnixcoSRpe8Rrv6RdEZ+vKeBrDZG056Brzi3IaxypiGQ166DylDDmVYd4nmOSXF3x5cnCFUpmVv7AC/gGuDCi7I0YXPLRyGua2ekxuMbRKCepWcTnq4GV8QrGHX88WbiirKSkoZK2SVogKf3ABkk1JI2QlC1ppaS7C/C6J0uaLukHSSMlVY647kVhLFvCZp7GYfkNkkZF7LdM0tsRn1dLap7PNV8Dro/43BMYGrnDL92zpK7AH4Er8qg1nSRpcvgdfiQpJdq9hNvOkDQrPG44UPrQvjpXVHmycEXZRcBbwAnA+8CzAJISgFHAXKAm0BnoK+m8ArpuT+BGoAaQCwwIr9sQGAb0BVKB0cAoSSWBTKCDpARJ1YESQLvwuHpAeeDLfK75OnClpMTwl3YFYNqBjfnds5mNBf4NDM+j1nQ1cANQFSgJ3BftXsL7eY8ggVUG/gtcdljfoCtyPFm4omySmY02s30Ev7gO/BJsBaSa2d/NbI+ZrQBeAq7M51z3hX9BH3i9ms++r5nZfDPbAfwF+I2kROAK4EMz+9jM9gKPA2WAs8IYtgHNgY7AOOBbSY3CzxPNbH8+11wDLAG6ENQwhh60/UjuGeAVM/vKzHYBb4fxkd+9AG0Jkt3TZrbXzN4BZkS5jivikuIdgHNH4buI9zuB0uGopZOAGpK2RGxPBCbmc67HzezPh3jd1RHvvyb4xZlCUNP4+sAGM9svaTXBX/oQ1C7OBuqH77cQJIozw8/RDAV+S/ALOwNoELHtSO4Zfv4dlg/f53cv+4Bv7aezkH6NK9Y8WbjiaDWw0swaRN3zyNSOeF8H2AtsBNYCpx7YIEnhvt+GRZnAhUBdgmahLcA1BMni2UO47ohwv5lm9rWkyPuLds+HO710fvdiQE1JikgYdYDlh3kNV4R4M5QrjqYDWyX9QVKZsJ2/maRWBXT+ayU1kVQW+DvwTtgU9jZwgaTOkkoA9wI5wBfhcZlAJ6CMma0h+Ku/K1AFmB3tomGz1zlAXs9GRLvn9UBa2LdxKPK7lykEfTV3S0qSdCnQ+hDP64ooTxau2Al/cV9I0P6+kuCv/kFAxXwO+/1Bz1lszGff14AhBE04pYG7w+suAa4FngmveSHBkN894favgO2ETUNmthVYAUwOYz6Ue8sys5/9BX8I9/zf8Oemg5/Z+IXr/OK9hPdzKUGT2PcE/RvvHkr8ruiSL37knHMuGq9ZOOeci8qThXPOuag8WTjnnIvKk4VzzrmoYvqchaQ+wC2AgJfM7Olw/psXCUaR5AJ3mNn0cP/7gZsIHvq528zGheUtCUaflCGYdqCPRemZT0lJsbS0tFjclnPOFVszZ87caGapB5fHLFmEM2TeQjD+eg8wVtKHwKPAg2Y2RlK38PPZkpoQTE3QlODp0U8kNQyHBL4A9AKmEiSLrsCY/K6flpZGVlZWbG7OOeeKKUl5Po0fy2aoxsBUM9tpZrkEDyRdQvD0Z3K4T0WCJ0UBegBvmVmOma0ElgGtw0nXks1sSlibGApcHMO4nXPOHSSWzVDzgX9JqgLsAroBWQSzWI6T9DhBsjor3L8mQc3hgDVh2d7w/cHlzjnnjpGY1SzMbBHwCPAxMJZg6uRc4HbgHjOrDdwDDA4PUV6nyaf8ZyT1kpQlKSs7O/so78A559wBMR0NZWaDzayFmWUAm4GlBNMrH5ga4L/8OKfMGn46QVstgiaqNeH7g8vzut5AM0s3s/TU1J/1zzjnnDtCMU0WkqqGP+sQzCUzjOAXfcdwl3MIEggEi9dcKamUpLoE0y9PN7N1wDZJbcOZL3sCI2MZt3POuZ+K9RTlI8I+i71AbzP7XtItQP9w3YHdBKOcMLMF4TKTCwmaq3pHTK52Oz8OnR1DlJFQzjnnClaxnUgwPT3dfOisc84dHkkzzSz94HJ/gvsgw6Z/Q+ZX3jnunHORPFlE2JO7n9emfM2tr2UxY9XmeIfjnHOFhieLCCWTEhh6U2tqVCzDja/MYP63P8Q7JOecKxQ8WRwkpXwpXru5DRVKJ3H9y9NZnr093iE551zcebLIQ80TyvD6zW0AuG7QNL7dsivOETnnXHx5svgF9VLLM/Sm1mzLyeXaQdPI3pYT75Cccy5uPFnko2mNigy5oRXf/bCb6wZP44ede+MdknPOxYUniyhanlSZgT1bsiJ7BzcMmc6OnNx4h+Scc8ecJ4tD0KFBKgOuas6c1Vu49bWZ5OTui36Qc84VI54sDlHXZtV55LLTmLRsI3cPm03uvv3xDsk5544ZTxaH4fL02vy1exPGLVjPH0bMY//+4jlVinPOHSzWEwkWOze2r8u23bk89clXVCidxN8ubEIwGa5zzhVfniyOwN2d67N1914GT1pJcukk+v3qlHiH5JxzMeXJ4ghI4s8XNGb77lwGfLaMCqVLcEtGvXiH5ZxzMePJ4ghJ4t+Xnsr2nFz+NXoRFUoncWXrOvEOyznnYsKTxVFITBBPXdGc7Tm53P+/eZQvnUT302rEOyznnCtwPhrqKJVMSuDFa1uSflIl7hk+h/FLNsQ7JOecK3CeLApAmZKJDP5tKxpWq8Dtr89k+kpfC8M5V7x4siggyaVL8OqNralxQhluHDKDeWt8LQznXPER02QhqY+k+ZIWSOoblg2XNCd8rZI0JyxPk7QrYtuLEedpKWmepGWSBqiQPtiQUr4Ub9zchoplSnD9K9NZtmFbvENyzrkCEbNkIakZcAvQGjgd6C6pgZldYWbNzaw5MAJ4N+Kw5Qe2mdltEeUvAL2ABuGra6ziPlrVKwZrYSRIXDtoOqs374x3SM45d9RiWbNoDEw1s51mlgtkApcc2BjWDn4DDMvvJJKqA8lmNsXMDBgKXBy7sI9e3ZRyvHZTa3buyeXawdPYsHV3vENyzrmjEstkMR/IkFRFUlmgG1A7YnsHYL2ZLY0oqytptqRMSR3CsprAmoh91oRlPyOpl6QsSVnZ2dkFdydHoHH1ZIbc2JrsbTlcN3g6W3buiWs8zjl3NGKWLMxsEfAI8DEwFpgLRC4GcRU/rVWsA+qY2RlAP+BNSclAXv0Tec7gZ2YDzSzdzNJTU1ML4C6OTos6lRh4XTorN+7gt6/MYLuvheGcK6Ji2sFtZoPNrIWZZQCbgaUAkpKAS4HhEfvmmNmm8P1MYDnQkKAmUSvitLWAtbGMuyC1b5DCgKvOYN63P9BraBa79/paGM65oifWo6Gqhj/rECSHAzWJLsBiM1sTsW+qpMTwfT2CjuwVZrYO2CapbdjP0RMYGcu4C1rXZify6GWn8cXyTdw1bDZ7fS0M51wRE+vnLEZIWgiMAnqb2fdh+ZX8vGM7A/hS0lzgHeA2MzvwdNvtwCBgGUGNY0yM4y5wl7WsxYMXNeXjhev5/Ttf+loYzrkiJaZzQ5lZh18o/20eZSMIhtLmtX8W0KxAg4uD689KY9vuvTz+UbAWxoMXNfW1MJxzRYJPJHiM9e5Un627cxk4YQXJpUtw33m+FoZzrvDzZHGMSeL+8xuxdddenh2/jAqlk7i148nxDss55/LlySIOJPGvS05lW04uD41ZTIXSJbi6ja+F4ZwrvDxZxElignjqN83ZkZPLn94L1sK46HRfC8M5Vzj5rLNxVDIpgReuaUmrtMr0Gz6Hzxavj3dIzjmXJ08WcVamZCKDr0+ncfVkbn99FlNXbIp3SM459zOeLAqBCuFaGLUrl+XmV7P4cs2WeIfknHM/4cmikKhcriSv39SGE8qW4PqXp7N0va+F4ZwrPDxZFCInVizN6ze1ISkxgWsGTfOE4ZwrNDxZFDJp4VoYe/ft54IBk3jioyU++aBzLu48WRRCjU5MZtw9GVxwWnWe+WwZ5z6VyfjFG+IdlnPuOObJopCqWqE0T13RnDdvaUPJxARuGDKD216bydotu+IdmnPuOOTJopA76+QUxvTJ4HfnncL4JRvo8mQmL01Y4dOcO+eOKU8WRUDJpAR6d6rPJ/06cma9Kvxr9CIufGYSWas2Rz/YOecKgCeLIqR25bIMuj6d/1zXkq279vLrF6fw+3fmsnmHr+/tnIstTxZFjCTOa3oin9zbkVs71uPdWd9yzhOfM3zGN76gknMuZjxZFFFlSyZx//mN+fDuDjSsWoE/jJjH5f+ZwqJ1W+MdmnOuGPJkUcSdcmIFht/alscvP52VG3fQ/ZlJ/OODhWzPyY13aM65YsSTRTEgiV+3rMVn93bkN+m1GTxpJV2eyGT0vHWYedOUc+7oxTRZSOojab6kBZL6hmXDJc0JX6skzYnY/35JyyQtkXReRHlLSfPCbQPkC1fn6YSyJXno0lN5946zqFSuJHe8MYvfvjKDrzftiHdozrkiLmbJQlIz4BagNXA60F1SAzO7wsyam1lzYATwbrh/E+BKoCnQFXheUmJ4uheAXkCD8NU1VnEXBy3qVGLUne34a/cmzPz6e859agL9P1lKTq5PG+KcOzKxrFk0Bqaa2U4zywUygUsObAxrB78BhoVFPYC3zCzHzFYCy4DWkqoDyWY2xYI2laHAxTGMu1hISkzgxvZ1+fTejvyqSTWe+uQruj49kUlLN8Y7NOdcERTLZDEfyJBURVJZoBtQO2J7B2C9mS0NP9cEVkdsXxOW1QzfH1z+M5J6ScqSlJWdnV1At1G0VUsuzbNXt2Doja0xM64dPI27hs1m/dbd8Q7NOVeExCxZmNki4BHgY2AsMBeIHKJzFT/WKgDy6oewfMrzuuZAM0s3s/TU1NQjiru4ymiYyti+GfTt0oBxC76j8xOZvDJ5Jbk+bYhz7hDEtIPbzAabWQszywA2A0sBJCUBlwLDI3Zfw09rHrWAtWF5rTzK3WEqXSKRvl0a8lHfDFqcVIkHRy2kx3OTmbPaV+ZzzuUv1qOhqoY/6xAkhwM1iS7AYjOLbF56H7hSUilJdQk6sqeb2Tpgm6S2YT9HT2BkLOMu7tJSyvHqDa147uoWbNyewyXPT+aP/5vHDzv3xjs051whlRTj84+QVAXYC/Q2s+/D8iv5aRMUZrZA0tvAQoLmqt5mdmD4zu3AEKAMMCZ8uaMgiQtOq05GwxSe/mQpQ75Yxbj533F/t8Zc1qImPjrZORdJxfWhrfT0dMvKyop3GEXGwrVb+fN785j1zRZa163MPy9uRsNqFeIdlnPuGJM008zSDy73J7gdAE1qJPPObWfx8KWn8tX6bXTrP5GHxyxm5x6fNsQ558nCRUhIEFe2rsOn/TpyyRk1eTFzOec+OYHpK33dDOeOd54s3M9UKV+Kxy4/nf/ediYlkxK46qWpDJq4wueZcu445snC/aJWaZV5/852dGlclX9+uIjeb87y2WydO055snD5qlC6BC9e25L7z2/E2Pnf0ePZSSzbsC3eYTnnjjFPFi4qSdza8WTeuLktP+zay0XPTuaDL/25SOeOJ54s3CE78+QqfHBXBxpXT+bON2fz91EL2evThTh3XPBk4Q7LiRVLM+yWtvz2rDRenrySqwZO9UkJnTsOeLJwh61kUgIPXNSU/lc2Z8HarVwwYBJTV2yKd1jOuRjyZOGOWI/mNRl5ZzuSSydxzaBpvDTBh9c6V1x5snBHpWG1Coy8sx3nNq7Gv0Yv4o43ZrFtt09I6Fxx48nCHbUKpUvwwrUt+FO3xny0cD09npvMV+t9eK1zxYknC1cgJHFLRj3euLkNW3flcvFzkxk114fXOldceLJwBaptvSp8eHd7mlRP5q5hs3lw1AL25PrwWueKOk8WrsBVSy7NsF5tubFdXV6ZvIqrXvLhtc4VdZ4sXEyUSEzgrxc24ZmrzmDROh9e61xR58nCxdSFp9dgZO92VCwTDK8dOGG5D691rgjyZOFirkG1Coy8sz3nNa3Gv0cv5vbXfXitc0WNJwt3TJQvlcRzV7fgzxc05uNF6+nxrA+vda4oiWmykNRH0nxJCyT1jSi/S9KSsPzRsCxN0i5Jc8LXixH7t5Q0T9IySQMkKZZxu9iQxM0d6vHmzW3YlpNLj2cnM3LOt/EOyzl3CGKWLCQ1A24BWgOnA90lNZDUCegBnGZmTYHHIw5bbmbNw9dtEeUvAL2ABuGra6zidrHXpl4VPryrPc1qJtPnrTk88L4Pr3WusItlzaIxMNXMdppZLpAJXALcDjxsZjkAZrYhv5NIqg4km9kUC3pGhwIXxzBudwxUTS7Nm7e05ab2dRnyRTC89rsffHitc4VVLJPFfCBDUhVJZYFuQG2gIdBB0jRJmZJaRRxTV9LssLxDWFYTWBOxz5qw7Gck9ZKUJSkrOzu74O/IFagSiQn8pXsTnr06GF7b/ZmJfLF8Y7zDcs7lIWbJwswWAY8AHwNjgblALpAEVALaAr8D3g77INYBdczsDKAf8KakZCCv/ok8x16a2UAzSzez9NTU1IK+JRcj3U+rwft3tqNimRJcO2gaL2b68FrnCpuYdnCb2WAza2FmGcBmYClBzeBdC0wH9gMpZpZjZpvC42YCywlqIWuAWhGnrQX4pEPFTP2qwfDa85tV5+Exi7nt9Zls9eG1zhUasR4NVTX8WQe4FBgGvAecE5Y3BEoCGyWlSkoMy+sRdGSvMLN1wDZJbcMaSE9gZCzjdvFRvlQSz159Bn/p3oRPF22gx7OTWfKdD691rjCI9XMWIyQtBEYBvc3se+BloJ6k+cBbwPVhx3UG8KWkucA7wG1mtjk8z+3AIGAZQY1jTIzjdnEiiZva12VYr7Zszwlmr/Xhtc7Fn4pr23B6erplZWXFOwx3FDZs3c2db85m+qrN9DzzJP7YrTGlSyTGOyznijVJM80s/eByf4LbFVpVk0vzxi1tuLl9XYZO+Zpu/SeStWpz9AOdcwXOk4Ur1EokJvDn7k144+Y27Nm3n8v/M4UH3l/Azj258Q7NueOKJwtXJLSrn8K4vhlcf2Yar05ZxXlPT2DyMn8mw7ljxZOFKzLKlUrigYua8vatZ1IiIYFrBk3j/nfn+RBb544BTxauyGmVVpnRfTpwa8d6DJ/xDec9NYHxi/OdNcY5d5TyTRaSWkk6MeJzT0kjw5lfK8c+POfyVrpEIvef35j/3dGOCqWTuGHIDPq9PYctO/fEOzTniqVoNYv/AHsAJGUADxNM5PcDMDC2oTkX3em1T2DUXe25u3MD3p+zli5PTmDs/O/iHZZzxU60ZJEY8WDcFcBAMxthZn8B6sc2NOcOTamkRPqd25CRd7ajWnIpbnt9Jr3fnMXG7TnxDs25YiNqspCUFL7vDHwWsS0pj/2di5umNSryXu92/O68U/h4wXrOfTKTkXO+9UkJnSsA0ZLFMCBT0khgFzARQFJ9gqYo5wqVEokJ9O5Un9F92pOWUo4+b83hlqEzWb/V18pw7mhEne5DUlugOvCRme0IyxoC5c1sVuxDPDI+3Yfbt994ZfJKHv9oSbB2xgVNuDy9Fr4qr3O/7Iim+wgXLZppZv8zsx2STpF0D9CsMCcK5wASE4I1v8f2yaBJ9WR+P+JLer48nTXf74x3aM4VOdGaocYCafB/TU9TgHpAb0kPxTY05wpGWko5ht3Sln9c3IxZX3/PeU9N4LUpq9i/3/synDtU0ZJFJTNbGr6/HhhmZncB5wPdYxqZcwUoIUFc1/Ykxt2TQYuTKvGXkQu46qWprNq4I96hOVckREsWkX96nUOwRCpmtodghTvnipRalcoy9MbWPPrr01i4bitd+09g0MQV7PNahnP5ipYsvpT0eNhPUR/4CEDSCTGPzLkYkcRv0mvzSb+OtK+fwj8/XMSvX/yCZRt8VT7nfkm0ZHELsJGg3+JXZnagZ7AJ8HgM43Iu5qoll+alnun0v7I5qzbuoFv/STw3fhl793ml2bmDRUsW5YFRZtbHzOZGlG8l6Px2rkiTRI/mNfm4X0fObVKNx8Yt4eLnJrNgrT9G5FykaMniGSAlj/KaQP+CD8e5+EgpX4rnrmnBi9e2YP3WHHo8O5knP1pCTu6+eIfmXKEQLVmcamaZBxea2TjgtGgnl9RH0nxJCyT1jSi/S9KSsPzRiPL7JS0Lt50XUd5S0rxw2wD5U1UuRro2q84n/TK4qHkNBny2jAufmcSc1VviHZZzcRctWZQ4wm1IakbQ59EaOB3oLqmBpE5AD+A0M2tK2PchqQlwJdAU6Ao8LykxPN0LQC+gQfjqGiVu547YCWVL8uRvmvPKb1uxbXculz4/mYdGL2L3Xq9luONXtGSxVFK3gwslnQ+siHJsY2Cqme00s1wgE7gEuB142MxyAMzswKo1PYC3zCzHzFYCy4DWkqoDyWY2xYK5SYYCFx/i/Tl3xDo1qsq4ezK4olUd/jNhBef3n8iMVZujH+hcMRQtWfQFnpY0JGw6ukvSqwT9FX2iHDsfyJBUJZw2pBtQG2gIdJA0TVKmpFbh/jWB1RHHrwnLaobvDy7/GUm9JGVJysrOzo4SnnPRJZcuwUOXnsobN7dh7779/OY/U3jyoyXk+ogpd5yJliwuAG4CJgMnha9Mgiakr/I70MwWAY8QPMg3FpgL5BJMbV4JaAv8Dng77IPIqx/C8inP65oDzSzdzNJTU1Oj3Jpzh65d/RTG9c3g1y1qMeCzZVwxcKrPMeWOK9GSRS2CX/iPAukEq+atB8oeysnNbLCZtTCzDGAzsJSgZvCuBaYTPAmeEpbXPujaa8PyWnmUO3dMlSuVxGOXn07/K5vz1XfbOL//RD78cl28w3LumMg3WZjZfWZ2FlAN+CPBL/wbgfmSFkY7uaSq4c86wKUE62O8RzB1yIGpzksSPPj3PnClpFKS6hJ0ZE83s3XANkltwxpIT2DkkdyscwWhR/OafHh3B05OLU/vN2fx/0Z8yc49ufEOy7mYOtTV7soAyUDF8LUWmHcIx42QVAXYC/Q2s+8lvQy8LGk+QU3l+rDjeoGkt4GFBM1Vvc3swPCT24EhYRxjwpdzcVOnSln+e9uZPPXxV7yQuZwZqzbzzFUtaFIjOd6hORcT+S5+JGkgwVDWbcA0YCrBCKfvj014R84XP3LHyhfLNtJ3+By27NzLH7s14vqz0nyBJVdkHdHiR0AdoBTwHfAtQf+BP6HkXISz6qcwtm8GHRqk8MCohdz8ahabtufEOyznClS0PouuQCt+nDTwXmCGpI8kPRjr4JwrKiqXK8mg69N54MImTFy6kfP7T+SLZRvjHZZzBSZazYJw1NJ8YDRBX8Fk4GSiP2fh3HFFEr9tV5f3erejQukkrhk8jUfHLvZZbF2xEG0N7rslvSVpNTCBYHW8JQQjmyofg/icK3Ka1Ehm1F3tuSK9Ns9/vpzLX5zCN5v8mQxXtEWrWaQB7wCtzayemV1nZs+b2Vwz8z+XnPsFZUsm8fBlp/Hc1S1Ynr2dbgMmMnLOt/EOy7kjFq3Pop+ZvRM+6+CcO0wXnFadMX06cMqJFejz1hzu++9cduT4Mxmu6InaZ+GcOzq1KpVleK+23H1OfUbMWkP3ZyYx/1tfXMkVLZ4snDsGkhIT6PerU3jz5rbs2rOPS56fzKCJK9i//5efc3KuMPFk4dwxdObJVRjTpwOdTqnKPz9cxA1DZpC9zZ/JcIWfJwvnjrFK5Uryn+ta8o+LmzFlxSbO7z+RCV/5lPqucPNk4VwcSOK6tifx/p3tqFyuBD1fns5DoxexJ9cHGbrCyZOFc3HU6MRkRvZuzzVtgtX4fv3iF6zcuCPeYTn3M54snIuzMiUT+dclp/LitS35etNOug+YyLuz1kQ/0LljyJOFc4VE12YnMqZPB5rWqEi/t+dyz/A5bNu9N95hOQd4snCuUKlxQhmG9WrLPV0aMnLOt3R/ZhJzV/tEzy7+PFk4V8gkJog+XRow/NYzyd1nXPbCF7yYudyfyXBx5cnCuUKqVVplRt/dgXObVOPhMYu5/pXpbNi6O95hueOUJwvnCrGKZUvw/DUteOjSU5mxajPn95/I+MUb4h2WOw7FNFlI6iNpvqQFkvqGZQ9I+lbSnPDVLSxPk7QrovzFiPO0lDRP0jJJA+RrVrrjiCSual2HUXe2J7VCKW4YMoO/jpzPVu/8dsdQzJKFpGbALUBr4HSgu6QG4eanzKx5+BodcdjyiPLbIspfAHoBDcJX11jF7Vxh1aBaBd7r3Y4b2qXx2tSv6fxEJu/N/hYz78twsRfLmkVjYKqZ7TSzXCATuORwTyKpOpBsZlMs+FcxFLi4YEN1rmgoXSKRv13YlPd7t6fGCWXoO3wOVw6cylfrt8U7NFfMxTJZzAcyJFWRVBboBtQOt90p6UtJL0uqFHFMXUmzJWVK6hCW1QQin1BaE5Y5d9w6tVZF/nf7Wfz7klNZ/N02uvWfyL9HL/K1MlzMxCxZmNki4BHgY2AsMBfIJWhSOhloDqwDnggPWQfUMbMzgH7Am5KSgbz6J/Ksd0vqJSlLUlZ2tk/M5oq3hARxdZs6jL/vbC5rUYuBE1bQ+YlMPvxynTdNuQIX0w5uMxtsZi3MLAPYDCw1s/Vmti9clvUlgj4NzCzHzDaF72cCy4GGBDWJWhGnrQWs/YXrDTSzdDNLT01Njd2NOVeIVC5Xkkd+fRrv3nEWVcqXpPebs+j58nSWZ2+Pd2iuGIn1aKiq4c86wKXAsLAP4oBLCJqrkJQqKTF8X4+gI3tFuKTrNkltw1FQPYGRsYzbuaKoRZ1KvH9nex68qClzVm+h69MTeGzcYnbt2Rfv0FwxkBTj84+QVHmPDfYAABLZSURBVAXYC/Q2s+8lvSapOUFT0irg1nDfDODvknKBfcBtZrY53HY7MAQoA4wJX865gyQmiOvPSqPbqdV5aPQinhu/nPdmr+VvFzbh3CbV8FHn7kipuLZtpqenW1ZWVrzDcC6upq3YxF9HLmDJ+m10OiWVBy9qRp0qZeMdlivEJM00s/SDy/0JbueKsTb1qvDB3e358wWNmb5yM12eyuTpT75i915vmnKHx5OFc8VcicQEbu5Qj0/vPZtfNanG058s5bynJzB+iU8b4g6dJwvnjhMnVizNs1e34I2b25CYIG54ZQa9hmax5vud8Q7NFQGeLJw7zrSrn8LYPhn8vuspTFy6kS5PZvLc+GW+/rfLlycL545DJZMSuOPs+nxyb0c6NkzlsXFL6Np/ApOXbYx3aK6Q8mTh3HGs5gll+M916bxyQyv27TeuGTSNO9+cxXc/+LoZ7qc8WTjn6HRKVcb1zeCeLg35aOF6Oj/xOS9NWMHefd405QKeLJxzQDCjbZ8uDfjkno60qVeFf41eRPcBk5i2YlO8Q3OFgCcL59xP1KlSlsHXpzPwupZsz8nlioFT6Td8DtnbcuIdmosjTxbOuZ+RxK+ansgn/TpyZ6f6fPDlOs55/HOGTF5JrjdNHZc8WTjnflGZkoncd94pjO3bgeZ1TuCBUQu56NnJzPrm+3iH5o4xTxbOuajqpZZn6I2tee7qFmzesYdLn/+CP7zzJZu2e9PU8cKThXPukEjigtOq8+m9Hbk1ox4jZq3h7MeDUVP+QF/x58nCOXdYypVK4v5ujRnbtwMtT6rEv0Yv4tynMhm34Dtfoa8Y82ThnDsi9atWYMgNrRlyQytKJiZw62szueqlqSxY+0O8Q3Mx4MnCOXdUzj6lKmP6dOAfPZqy5LttdH9mEn9450s2bPOnwIsTTxbOuaOWlJjAdWem8fnvOnFTu7q8O3sNnR77nOfGL/O1M4oJTxbOuQJTsUwJ/ty9CR/d05F29VN4bNwSOj+Ryai5a70/o4jzZOGcK3B1U8oxsGc6b97ShuQyJbhr2Gx+/eIU5qzeEu/Q3BGKabKQ1EfSfEkLJPUNyx6Q9K2kOeGrW8T+90taJmmJpPMiyltKmhduGyBfdd65IuGsk1P44K72PHLZqXy9aScXPzeZe4bPYd0Pu+IdmjtMMUsWkpoBtwCtgdOB7pIahJufMrPm4Wt0uH8T4EqgKdAVeF5SYrj/C0AvoEH46hqruJ1zBSsxQVzRqg6f/+5s7jj7ZD6ct45Oj3/Okx9/xc49ufEOzx2iWNYsGgNTzWynmeUCmcAl+ezfA3jLzHLMbCWwDGgtqTqQbGZTLGj0HApcHMO4nXMxUL5UEr/v2ohP+3WkS+NqDPh0KZ0e/5wRM9ewf7/3ZxR2sUwW84EMSVUklQW6AbXDbXdK+lLSy5IqhWU1gdURx68Jy2qG7w8u/xlJvSRlScrKzs4uyHtxzhWQ2pXL8uzVLXjntjM5Mbk09/53Lhc/P5kZqzbHOzSXj5glCzNbBDwCfAyMBeYCuQRNSicDzYF1wBPhIXn1Q1g+5Xldc6CZpZtZempq6tHdgHMuptLTKvO/O9rx1BWns2FrDpe/OIXeb8xi9ead8Q7N5SGmHdxmNtjMWphZBrAZWGpm681sn5ntB14i6NOAoMZQO+LwWsDasLxWHuXOuSIuIUFcckYtPruvI306N+DTxevp/GQmj4xdzLbde+MdnosQ69FQVcOfdYBLgWFhH8QBlxA0VwG8D1wpqZSkugQd2dPNbB2wTVLbcBRUT2BkLON2zh1bZUsmcc+5DRl/39l0P7U6L3y+nE6Pf86w6d+wz/szCgXF8kEZSROBKsBeoJ+ZfSrpNYImKANWAbeGCQFJfwJuJGiu6mtmY8LydGAIUAYYA9xlUQJPT0+3rKysWNyWcy7G5qzewj8+WMjMr7+n0YkV+Gv3JpxVPyXeYR0XJM00s/SflRfXpyo9WThXtJkZH85bx0OjF/Ptll10aVyNP13QmLop5eIdWrH2S8nCn+B2zhVKkuh+Wg0+vbcjvzvvFKYs38ivnsrkHx8s5Ied3p9xrHmycM4VaqVLJNK7U33G/+5sLmtRi5cnr+Tsx8fz6her2OvrgR8zniycc0VC1Qqlefiy0/jgrvY0OjGZv72/gPP7T2T8kg0+SeEx4H0Wzrkix8z4eOF6/j16Eas27aR25TJ0blSNcxpVpU29ypRKSox+Epcn7+B2zhU7e3L387/Za/howXomLdtITu5+ypZMpEODFDo3qsbZjVKpWqF0vMMsUjxZOOeKtV179jFlxUY+XbSB8Ys3sPaHYKW+02tV5JxG1ejcuCpNayTjk1bnz5OFc+64YWYs/m4bny3ewKeL1jN79RbMoFpyKc5pVJVzGlWjXf0qlC2ZFO9QCx1PFs6549am7Tl8viSbzxZvIPOrbLbn5FIyKYGzTq5C50ZV6dSoKrUqlY13mIWCJwvnnCPo58hatZlPw1rHqk3BxIWNTqzAOY2q0rlxVZrXrkRiwvHZXOXJwjnn8rAie3vYXLWBGas2k7vfqFS2BJ1OCWocGQ1TqVimRLzDPGY8WTjnXBQ/7NrLxKXZfLZoA+OXbOD7nXtJTBCt0ioFQ3MbV+Xk1PLxDjOmPFk459xh2LffmLP6ez5dtIHPFm9g8XfbAEirUvb/Rle1SqtMyaTi9WyzJwvnnDsKa77fyfjFG/h08Qa+WL6JPbn7KV8qiYyGKZzTqBpnn5JKSvlS8Q7zqHmycM65ArJzTy6Tl23is8Ub+GzxetZvzSExQfwmvTZ9uzSgWnLRfRDQk4VzzsWAmbFg7Vb+m7WaN6d/Q2KCuKl9XW7teDLJpYtex7gnC+eci7GvN+3giY++4v25a6lUtgS9O9XnujNPKlJzVfl6Fs45F2MnVSnHgKvO4IO72tOsZkX++eEiznk8k3dnrSnyy8N6snDOuQLWrGZFXrupDa/f1IbK5UrS7+25XDCgaE+n7snCOedipH2DFEb2bsczV53Brr37uOGVGVz10lTmrN4S79AOW0yThaQ+kuZLWiCp70Hb7pNkklLCz2mSdkmaE75ejNi3paR5kpZJGiCfNtI5V0QkJIgLT6/Bx/d05O89mrJsw3Yufm4yd7wxkxXZ2+Md3iGL2ZSLkpoBtwCtgT3AWEkfmtlSSbWBc4FvDjpsuZk1z+N0LwC9gKnAaKArMCZWsTvnXEErmZRAzzPTuLRFLQZNXMFLE1YwbsF6rmxVmz6dG1C1kA+3jWXNojEw1cx2mlkukAlcEm57Cvg9ELXxTlJ1INnMpljQ2DcUuDhGMTvnXEyVL5VE3y4N+fx3nbi2TR2Gz1hNx8c+5/FxS9i6e2+8w/tFsUwW84EMSVUklQW6AbUlXQR8a2Zz8zimrqTZkjIldQjLagJrIvZZE5b9jKRekrIkZWVnZxfgrTjnXMFKrVCKB3s049N7O9KlSTWeHb+Mjo+OZ/CkleTk7ot3eD8Ts2RhZouAR4CPgbHAXCAX+BPw1zwOWQfUMbMzgH7Am5KSgbz6J/KskZjZQDNLN7P01NTUArgL55yLrZOqlOOZq85g1J3BcNt/fLCQcx7P5H+z17C/EA23jWkHt5kNNrMWZpYBbAZWAXWBuZJWAbWAWZJONLMcM9sUHjcTWA40JKhJ1Io4bS1gbSzjds65Y+3UWsFw29duak2lciW4Z/hcLnhmEp8XkuG2sR4NVTX8WQe4FBhqZlXNLM3M0ggSQQsz+05SqqTEcP96QANghZmtA7ZJahuOguoJjIxl3M45Fy8dGqTyfu/2DLjqDHbk5PLbV2Zw9UvTmBvn4baxXoB2hKQqwF6gt5l9n8++GcDfJeUC+4DbzGxzuO12YAhQhmAUlI+Ecs4VWwkJ4qLTa9C16YkMm/4NAz5dSo/nJnPBqdW577xTqJtS7pjH5HNDOedcIbc9J5eXJqzgpYkryMndH9Phtj6RoHPOFXHZ23J45rOlvDntG0okJnBzh7r0yqhHhQKc3daThXPOFROrNu7giY+/YtTctVQuV5I7O9XnmrZ1CmR2W5911jnniom0lB+H2zauXoG/f7CQzk9k8t7sb2M23NaThXPOFVGn1qrIGze35bWbWlOxTAn6Dp/DBc9MYv3W3QV+rViPhnLOORdjHRqk0u7kFEZ9uZYPv1xHagzWAvdk4ZxzxUBCgujRvCY9muc5G9LRnz8mZ3XOOVeseLJwzjkXlScL55xzUXmycM45F5UnC+ecc1F5snDOOReVJwvnnHNRebJwzjkXVbGdSFBSNvB1vOM4SinAxngHUUj4d/FT/n38lH8fPzra7+IkM/vZutTFNlkUB5Ky8pr98Xjk38VP+ffxU/59/ChW34U3QznnnIvKk4VzzrmoPFkUbgPjHUAh4t/FT/n38VP+ffwoJt+F91k455yLymsWzjnnovJk4ZxzLipPFoWMpNqSxktaJGmBpD7xjqkwkJQoabakD+IdS7xJOkHSO5IWh/+fnBnvmOJF0j3hv5P5koZJKh3vmI4lSS9L2iBpfkRZZUkfS1oa/qxUENfyZFH45AL3mlljoC3QW1KTOMdUGPQBFsU7iEKiPzDWzBoBp3Ocfi+SagJ3A+lm1gxIBK6Mb1TH3BCg60Fl/w/41MwaAJ+Gn4+aJ4tCxszWmdms8P02gl8EsVknsYiQVAu4ABgU71jiTVIykAEMBjCzPWa2Jb5RxVUSUEZSElAWWBvneI4pM5sAbD6ouAfwavj+VeDigriWJ4tCTFIacAYwLb6RxN3TwO+B/fEOpBCoB2QDr4TNcoMklYt3UPFgZt8CjwPfAOuAH8zso/hGVShUM7N1EPzxCVQtiJN6siikJJUHRgB9zWxrvOOJF0ndgQ1mNjPesRQSSUAL4AUzOwPYQQE1MxQ1YVt8D6AuUAMoJ+na+EZVfHmyKIQklSBIFG+Y2bvxjifO2gEXSVoFvAWcI+n1+IYUV2uANWZ2oLb5DkHyOB51AVaaWbaZ7QXeBc6Kc0yFwXpJ1QHCnxsK4qSeLAoZSSJoj15kZk/GO554M7P7zayWmaURdF5+ZmbH7V+PZvYdsFrSKWFRZ2BhHEOKp2+AtpLKhv9uOnOcdvYf5H3g+vD99cDIgjhpUkGcxBWodsB1wDxJc8KyP5rZ6DjG5AqXu4A3JJUEVgA3xDmeuDCzaZLeAWYRjCKczXE27YekYcDZQIqkNcDfgIeBtyXdRJBQLy+Qa/l0H84556LxZijnnHNRebJwzjkXlScL55xzUXmycM45F5UnC+ecc1F5snBFkiST9ETE5/skPVBA5x4i6dcFca4o17k8nDV2fCzjkpQm6erDj9C5H3mycEVVDnCppJR4BxJJUuJh7H4TcIeZdYpVPKE04LCSxWHehzsOeLJwRVUuwQNY9xy84eC/wCVtD3+eLSlT0tuSvpL0sKRrJE2XNE/SyRGn6SJpYrhf9/D4REmPSZoh6UtJt0acd7ykN4F5ecRzVXj++ZIeCcv+CrQHXpT0WB7H/D48Zq6kh/PYvupAopSULunz8H1HSXPC12xJFQge0uoQlt1zqPchqZykD8MY5ku64lD+w7jiyZ/gdkXZc8CXkh49jGNOBxoTTOu8AhhkZq3DRabuAvqG+6UBHYGTgfGS6gM9CWY2bSWpFDBZ0oFZTlsDzcxsZeTFJNUAHgFaAt8DH0m62Mz+Lukc4D4zyzromPMJppVuY2Y7JVU+jPu7D+htZpPDySh3E0w0eJ+ZHUh6vQ7lPiRdBqw1swvC4yoeRhyumPGahSuywtl4hxIsgHOoZoRrhuQAy4EDvyTnESSIA942s/1mtpQgqTQCfgX0DKdhmQZUARqE+08/OFGEWgGfh5Pd5QJvEKxHkZ8uwCtmtjO8z4PXK8jPZOBJSXcDJ4TXPNih3sc8ghrWI5I6mNkPhxGHK2Y8Wbii7mmCtv/INR1yCf/fDieYKxmxLSfi/f6Iz/v5aU374HlwDBBwl5k1D191I9ZP2PEL8elQb+SgY6LNw/N/9wj831KiZvYwcDNQBpgqqdEvnD/qfZjZVwQ1onnAQ2HTmTtOebJwRVr4V/fbBAnjgFUEv+QgWO+gxBGc+nJJCWE/Rj1gCTAOuD2cQh5JDQ9h4aFpQEdJKWGn8VVAZpRjPgJulFQ2vE5ezVCr+PEeLztQKOlkM5tnZo8AWQQ1om1AhYhjD+k+wia0nWb2OsEiQ8frVOgO77NwxcMTwJ0Rn18CRkqaTrAG8S/91Z+fJQS/1KsBt5nZbkmDCJqqZoU1lmyiLFlpZusk3Q+MJ/iLfrSZ5TtltJmNldQcyJK0BxgN/PGg3R4EBkv6Iz9dSbGvpE7APoKpy8cQ1JpyJc0lWLO5/yHex6nAY5L2A3uB2/OL2xVvPuusc865qLwZyjnnXFSeLJxzzkXlycI551xUniycc85F5cnCOedcVJ4snHPOReXJwjnnXFT/HzF1MpUm2hhDAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn.cluster import KMeans  # 使用方法详见： http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html\n",
    "wcss = []\n",
    "for i in range(1,11):\n",
    "    ############ 第四题 ～ 1行 （初始化 KMeans） ############ \n",
    "    ############ your code start ############     \n",
    "    kmeans = KMeans(n_clusters=i, random_state=0)\n",
    "    ############ your code end ############\n",
    "    kmeans.fit(X3)\n",
    "    wcss.append(kmeans.inertia_)\n",
    "plt.plot(range(1,11),wcss)\n",
    "plt.title('The Elbow Method')\n",
    "plt.xlabel('Number of clusters')\n",
    "plt.ylabel('WCSS')\n",
    "plt.savefig('elbow.png')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "2dd63611-35d6-4c6e-b076-c10e18a3b10c",
    "_uuid": "d5184cf4e47df719970d89c5ea8e15d4b9eaa1e5"
   },
   "source": [
    "由于可能产生多个肘点，所以有时候不得不通过反复试验来选择合适数量的簇。下面展示不同数量簇的结果，以找出合适数量的簇。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "_cell_guid": "e096b262-a06c-4f0a-9c50-2ef4bda9b926",
    "_uuid": "0ed982322b3a0fecb997e88ef0fb2f681c5a801c",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['death', 'debat', 'decid', 'decis', 'declar', 'defeat', 'defenc', 'defend', 'delay', 'demand', 'democrat', 'dengu', 'deni', 'dept', 'deputi', 'despit', 'destroy', 'detain', 'develop', 'die', 'differ', 'diplomat', 'disappoint', 'disast', 'discuss', 'dismiss', 'disput', 'divis', 'doctor', 'dog', 'dollar', 'domest', 'dont', 'dope', 'doubl', 'doubt', 'downer', 'draft', 'draw', 'drink', 'drive', 'driver', 'drop', 'drought', 'drug', 'dubai', 'dump', 'dure', 'e', 'earli']\n"
     ]
    }
   ],
   "source": [
    "print(words[250:300])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "a3c414e5-70b2-4c63-a46a-f9c82e406e4b",
    "_uuid": "6992f369b10d54adf27ecdcf24c6f57deabf466f"
   },
   "source": [
    "## 3.2 Clusters 等于 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "_cell_guid": "b395dddc-8acb-40a8-825e-a6f3a615809e",
    "_uuid": "e0e187a022ec9032385f715d87cfbb865a11698d",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : polic, man, plan, govt, new, charg, murder, face, court, vic, probe, stab, wa, death, nsw, arrest, sa, hospit, car, drug, jail, search, urg, water, qld\n",
      "1 : win, council, iraqi, say, claim, baghdad, warn, report, world, kill, fund, rain, cup, urg, water, lead, set, death, nsw, crash, troop, continu, open, final, hope\n",
      "2 : iraq, war, protest, anti, say, howard, pm, ralli, missil, troop, deni, post, blair, bush, warn, downer, forc, fear, plan, destroy, british, condemn, turkey, student, tv\n",
      "9679.422087686096\n"
     ]
    }
   ],
   "source": [
    "kmeans = KMeans(n_clusters = 3, n_init = 20, n_jobs = 1) # n_init(number of iterations for clsutering) n_jobs(number of cpu cores to use)\n",
    "kmeans.fit(X3)\n",
    "# We look at 3 the clusters generated by k-means.\n",
    "# argsort 使用方法详见： https://numpy.org/doc/stable/reference/generated/numpy.argsort.html\n",
    "common_words = kmeans.cluster_centers_.argsort()[:,-1:-26:-1]\n",
    "for num, centroid in enumerate(common_words):\n",
    "    print(str(num) + ' : ' + ', '.join(words[word] for word in centroid))\n",
    "print(kmeans.inertia_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "05f80701-83af-49c1-a7b8-df8c20396bb9",
    "_uuid": "4750051c6cbdc6d262f31a94f5c4776eb020644a"
   },
   "source": [
    "## 3.3 Clusters 等于 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "_cell_guid": "137b7e2c-970f-4f2f-929e-c7b2bd2a8004",
    "_uuid": "2813cee7e61b966b179b806d29b222c55551ee35",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : iraq, war, protest, anti, say, howard, pm, missil, ralli, troop, deni, blair, post, bush, forc, warn, fear, plan, destroy, british, condemn, turkey, student, downer, oil\n",
      "1 : win, lead, season, fan, m, goal, india, award, open, hope, tiger, championship, world, gold, return, titl, thriller, cup, coast, stage, streak, best, case, celebr, waratah\n",
      "2 : new, council, secur, plan, resolut, elect, hope, fund, land, appoint, ceo, presid, consid, seek, welcom, water, develop, rate, offer, iraq, work, law, help, open, look\n",
      "3 : govt, vic, nsw, sa, urg, fund, qld, wa, local, consid, claim, reject, fed, nt, tas, opp, say, plan, reform, deni, hospit, defend, review, issu, crean\n",
      "4 : polic, man, plan, iraqi, charg, say, warn, claim, baghdad, kill, death, court, report, face, world, water, rain, crash, miss, fund, urg, cup, set, nsw, lead\n",
      "9597.427044588552\n"
     ]
    }
   ],
   "source": [
    "############ 第五题 ############ \n",
    "# 重复上面的做法，仅改变聚类簇的数量 (尝试将上述过程封装成一个函数，减少代码的重读性，方便传参调用)\n",
    "def get_kmeans_learning_results(data, n_clusters=3, n_init=20, n_jobs=1):\n",
    "    \"\"\"KMeans 学习函数\"\"\"\n",
    "    kmeans = KMeans(n_clusters=n_clusters, n_init=n_init, n_jobs=n_jobs)\n",
    "    kmeans.fit(data)\n",
    "    common_words = kmeans.cluster_centers_.argsort()[:, -1:-26:-1]\n",
    "    for num, centroid in enumerate(common_words):\n",
    "        print(str(num) + ' : ' + ', '.join(words[word] for word in centroid))\n",
    "    print(kmeans.inertia_)\n",
    "        \n",
    "get_kmeans_learning_results(X3, n_clusters=5)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "e5ab0cdc-1178-486d-a982-3457aa69d234",
    "_uuid": "4faeeea430f9f679a594b2c7f1ef1c857611ade5"
   },
   "source": [
    "## 3.4 Clusters 等于 6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "_cell_guid": "de4d9fbb-2462-4491-899f-b1e28e1a7697",
    "_uuid": "73c1c5bfafe12fd9bd57ab0c3bd07f60630f05a8"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : iraq, polic, new, say, probe, missil, troop, pm, kill, attack, resolut, death, wa, govt, arrest, drug, deni, warn, report, work, investig, car, murder, howard, destroy\n",
      "1 : man, charg, murder, court, face, polic, stab, jail, miss, death, assault, arrest, u, search, accid, hospit, car, attack, die, plane, injur, fatal, sex, convict, child\n",
      "2 : war, protest, anti, iraq, howard, ralli, pm, post, say, student, fear, condemn, bush, market, thousand, march, downer, iraqi, warn, deni, start, end, stage, peac, open\n",
      "3 : win, govt, iraqi, claim, warn, baghdad, report, world, kill, nsw, urg, rain, fund, cup, lead, death, set, water, crash, open, court, final, continu, face, troop\n",
      "4 : council, secur, elect, fund, land, plan, seek, welcom, consid, water, chang, urg, manag, decis, park, develop, studi, offer, hold, iraq, vote, centr, claim, poll, meet\n",
      "5 : plan, water, shire, new, govt, park, manag, protest, firm, green, begin, group, m, welcom, war, defend, health, rail, land, farmer, station, burn, concern, opp, carr\n",
      "9549.98128601541\n"
     ]
    }
   ],
   "source": [
    "# 同上\n",
    "get_kmeans_learning_results(X3, n_clusters=6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "bff92434-ec9d-4e6b-a4a2-72ec4ea6f3bc",
    "_uuid": "7225da580975f8589615a8fd5232ba3c80780845"
   },
   "source": [
    "## 3.5 Clusters 等于 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "_cell_guid": "4134a6d2-09aa-4cf5-9821-d95d0482c4ac",
    "_uuid": "cf3c5af56aa05f6679effe85774207fe824255a1"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 : win, new, plan, iraqi, claim, baghdad, warn, world, report, water, rain, cup, kill, urg, lead, fund, set, open, hope, nsw, concern, hit, final, continu, court\n",
      "1 : crash, investig, begin, plane, fatal, car, helicopt, kill, die, probe, death, chopper, hospit, truck, underway, pilot, dead, road, inquest, highway, caus, man, inquiri, woman, bus\n",
      "2 : iraq, say, missil, troop, pm, deni, attack, howard, destroy, soldier, bomb, blair, report, kill, british, baghdad, warn, aid, bush, downer, forc, oil, iraqi, turkey, blix\n",
      "3 : council, secur, elect, fund, land, plan, seek, welcom, consid, water, chang, urg, manag, decis, park, develop, studi, offer, hold, iraq, vote, centr, claim, poll, meet\n",
      "4 : war, protest, anti, iraq, howard, ralli, pm, post, student, plan, fear, condemn, iraqi, market, thousand, march, say, bush, deni, downer, start, end, stage, peac, open\n",
      "5 : polic, probe, arrest, search, death, man, murder, investig, drug, cannabi, station, wa, miss, victim, car, stab, protest, charg, fatal, suspect, driver, nt, road, bodi, shoot\n",
      "6 : govt, vic, nsw, sa, urg, fund, qld, wa, local, consid, claim, reject, fed, nt, tas, opp, say, plan, reform, deni, hospit, defend, review, issu, crean\n",
      "7 : man, charg, face, court, murder, stab, jail, assault, death, accid, u, miss, fraud, child, attack, guilti, sex, car, offenc, hospit, rape, teen, drug, attempt, fatal\n",
      "9484.50526734011\n"
     ]
    }
   ],
   "source": [
    "# 同上\n",
    "get_kmeans_learning_results(X3, n_clusters=8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "_cell_guid": "a168498b-1322-4010-b462-596bc0ba186b",
    "_uuid": "4717568e34623b3b88f033670c20f01f7ddd63a6"
   },
   "source": [
    "最后，我们可以根据不同簇数量产生的聚类结果，人工判断哪一个聚类的效果最好"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEWCAYAAACXGLsWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dd5xU1f3/8dd7G0uvi5QFRER6EVYsCEblG1GxYEUx2DGKBaMppppvfuYrRmOCBWMPoggGayyxQpS+KL0ISO+9iMAu+/n9ce/quC47zLDDbPk8H4957MyZe8987uzsfuacc+85MjOcc865kqQkOwDnnHNlnycL55xzUXmycM45F5UnC+ecc1F5snDOOReVJwvnnHNRebJwzjkXlScLl3CSTNKxB3luoKT3D6GOeyWNiuE1d0s6JpY4D1LP85L+3+HWE9Y1XtINpVFXMpXme+LKD08WLiaS7pH0TpGyxQcpGxCtPjN70cx+XNpxmlkNM/sqln0kXSPps9KOJZHKY8wAkn4kaXWy43CHzpOFi9V/gZ6SUgEkNQLSgW5Fyo4Nt3Wu1ElKS3YMlY0nCxer6QTJoWv4uDfwCbCoSNlSM1sbsV+fsLWxTdJjkgQ//GYsqYOkDyRtlbRB0q8j6siQNFLSLknzJOUcLMjIri9J50iaH+63RtLdxWzfDngCODnswtoe8XRdSW+H+0+V1Cpiv7YR8S6SdFmU96+VpGmSdkh6Q1K9iLpOkjRJ0nZJsyT9KOK5ayR9FcawLOy+KynmyGMbL+lPkiaG+78vqUGUOJF0akQ8qyRdU8w2P2jZRHvvJVUH3gWahHHvltREUoqkX0laKmmLpLGF74+ko8N6r5e0Evg4WvyudHmycDExs/3AVIKEQPjzU+CzImVFWxX9gBOALsBlwFlF65ZUE/gQeA9oQtA6+Shik/OBl4E6wJvAo4cY9jPATWZWE+hIMf9ozGwB8FNgctiFVSfi6SuAPwJ1gSXAfWG81YEPgJeAhuF2j0vqUEIsg4DrwuPLB4aHdTUF3gb+H1APuBsYJykrfJ3hwNnhMZwCzIwSc1FXAteGcWaE9R+UpOYE/9AfAbIIvgjMLGmfg/jBe29mXwNnA2vDuGuEXyxuBy4ETiN4f7YBjxWp7zSgHcV8flxiebJw8ZjAd4mhF0Gy+LRI2YQi+9xvZtvNbCVBS6QrP9QPWG9mD5nZXjPbZWZTI57/zMzeMbMDwAsEiedQ5AHtJdUys21m9vkh7lfoVTObZmb5wIsRsfcDlpvZc2aWH9Y7DrikhLpeMLO54T/M3wGXhd13VwHvhMdXYGYfALnAOeF+BUBHSVXNbJ2ZzYvxGJ4zsy/N7BtgLMW//5EGAh+a2WgzyzOzLWYWT7KI5b2/CfiNma02s33AvcAlRbqc7jWzr8PjcEeQJwsXj/8Cp0qqC2SZ2WJgEnBKWNaRH7Ys1kfc3wPUKKbeZsDSEl63aB2Zh9h3fTHBP90VkiZIOvkQ9inpdQtjbwGcGHbTbA+7gQYCjUqoa1XE/RUEXXoNwrouLVLXqUDjMLFcTtCKWBd2ibUtpWM4mGi/i0MVy3vfAngt4vgXAAeAoyK2WVXsni7hPFm4eEwGagODgYkAZrYTWBuWrTWzZXHUuwpoFXWrGJnZdDO7gKAL5nWCb9bFbhpj1auACWZWJ+JWw8xuLmGfZhH3mxN8894c1vVCkbqqm9n94TH8x8z+B2gMLASeijPmQ3Wov4uvgWqFD8KTG75VwntfXNyrCLraIt+DTDNbE1llLAfhSo8nCxezsAsgF/gZQfdToc/CsnjPgvo30EjSUElVJNWUdOLhxCopIxwMrm1mecBOgm+rxdkAZEvKiCHe4yT9RFJ6eDshHHg+mKsktZdUDfhf4F9ht9oo4DxJZ0lKlZSp4PTSbElHSTo/HLvYB+yOOIZYYz5ULxKclHCZpDRJ9SUV13U1C+ggqaukTIKuIyDqe78BqC+pdkRdTwD3SWoR7p8l6YJSPi4XJ08WLl4TCL4tRp4J82lYFleyMLNdwP8A5xF0mywGTj+8MAH4CbBc0k6CrpyrDrLdx8A8YL2kzYcY74+BAQStqvXAMKBKCbu9ADwfbptJMKiLma0CLgB+DWwi+Jb9c4K/0RTgrvA1thIM8t4ST8yHKhxbOid83a0Eg9s/GCMysy8Jkt6HBL+votd8FPvem9lCYDTwVdjt1AT4O8GJC+9L2gVMAQ7ry4IrPfKV8pxzzkXjLQvnnHNRebJwrpIKxxN2F3OL9bRcVwl4N5RzzrmoKuz8Kg0aNLCjjz462WE451y5MmPGjM1mllW0vMImi6OPPprc3Nxkh+Gcc+WKpBXFlfuYhXPOuagSmiwk3SFproIZQoeGZV0lTZE0U1KupB4R298jaYmC2TvPiijvLmlO+NxwKZix1Dnn3JGRsGQhqSNwI9CD4GKefpJaAw8AfzSzrsDvw8dIak9wcVMHoC/B7J2pYXUjCKaRaB3e+iYqbueccz+UyJZFO2CKme0JZ+ucAPQnmNulVrhNbYKrUiG4evVlM9sXziu0BOghqTFQy8wmW3Dq1kiCaYydc84dIYkc4J5LMM9LfeAbgqkDcoGhwH8kPUiQrE4Jt29KcHl/odVhWV54v2j5D0gaTNACoXnz5qV2IM45V9klrGURLswyjGBxmPcIJhzLB24G7jSzZsCdBIujABQ3DmEllBf3mk+aWY6Z5WRl/eDML+ecc3FK6AC3mT1jZt3MrDfBZGSLgauBV8NNXiEY04CgxRA5fXM2QRfV6vB+0XLnnHNHSKLPhmoY/mwOXEQwy+RaglkzAc4gSCAQzDY5IJyauiXBQPY0M1sH7FKwPrEIlqV8I1Exj5m+ko8XbkhU9c45Vy4l+qK8ceGYRR4wxMy2SboR+Hu4wtlewjEGM5snaSwwn6C7akg4zz8EXVfPA1UJ1gV+NxHB5h0oYOTkFazcsofXb+1Jq6xoi4k551zlUGHnhsrJybF4ruBevW0P5z86kTrV0nl9SE9qZaYnIDrnnCubJM0ws5yi5X4FdxHZdavx2JXdWLFlDz8bM5OCgoqZTJ1zLhaeLIpxcqv6/L5fez5csJGHP/wy2eE451zSebI4iEEnt+DS7tk88vES3p2zLtnhOOdcUnmyOAhJ/OnCjnRtVoe7XpnFwvU7kx2Sc84ljSeLEmSmp/KPn3SnepU0Bo+cwfY9+5MdknPOJYUniyiOqpXJE1d1Z/2Ovdw2+gvyDxQkOyTnnDviPFkcgu4t6vK/F3Tg08WbeeA/i5IdjnPOHXEVdqW80jagR3Pmrd3Jk//9ivaNa3Hh8cXOZeiccxWStyxi8Pvz2tOjZT1+OW42c1bvSHY4zjl3xHiyiEF6agqPD+xG/eoZ3PRCLpt370t2SM45d0R4sohRgxpV+MdPctjy9X5uefFz8nzA2zlXCXiyiEOn7NoMu7gz05Zt5U//np/scJxzLuF8gDtOFx7flHlrd/DUp8vo0KQWl5/gK/M55youb1kchl/2bUuv1g343evzmLFiW7LDcc65hPFkcRjSUlN45IrjaVQ7k5tHzWDDzr3JDsk55xLCk8VhqlMtgycHdWf3vnxuemEGe/MORN/JOefKGU8WpaBto1o8dGkXZq7azu/fmEtFXVDKOVd5ebIoJWd3asxtZxzL2NzVjJy8ItnhOOdcqfJkUYru7HMcZ7ZtyP/+ez4Tl2xOdjjOOVdqPFmUopQU8fCArhzToDo3/DPXE4ZzrsLwZFHKamWm89KNJ9G8XjWufX46nyzamOyQnHPusHmySICsmlUYPfgkjs2qwU0jZ/DB/A3JDsk55w6LJ4sEqVc9g9E3nkS7xjW5edQM3vF1vJ1z5ZgniwSqXS2dUTecSNdmdbj1pc95/Ys1yQ7JOefiktBkIekOSXMlzZM0NCwbI2lmeFsuaWbE9vdIWiJpkaSzIsq7S5oTPjdckhIZd2mqmZnOP6/rQY+W9bhz7EzGTl+V7JCccy5mCUsWkjoCNwI9gC5AP0mtzexyM+tqZl2BccCr4fbtgQFAB6Av8Lik1LC6EcBgoHV465uouBOhepU0nrumB6ce24BfjJvNqCl+HYZzrnxJZMuiHTDFzPaYWT4wAehf+GTYOrgMGB0WXQC8bGb7zGwZsAToIakxUMvMJltwafRI4MIExp0QVTNSeWpQDme2bchvX5/LM58tS3ZIzjl3yBKZLOYCvSXVl1QNOAdoFvF8L2CDmS0OHzcFIvtoVodlTcP7Rct/QNJgSbmScjdt2lRKh1F6MtNTGXFVd/p2aMSf/j2fEeOXJjsk55w7JAlLFma2ABgGfAC8B8wC8iM2uYLvWhUAxY1DWAnlxb3mk2aWY2Y5WVlZccWdaBlpKTx65fGc36UJw95byN8/XOxzSTnnyryELn5kZs8AzwBI+jNhC0FSGnAR0D1i89V8v+WRDawNy7OLKS+30lJTePjyrmSkpfDwh1+y/8AB7v5xG8rRuL1zrpJJ9NlQDcOfzQmSQ2FLog+w0Mwiu5feBAZIqiKpJcFA9jQzWwfsknRSOM4xCHgjkXEfCakp4oGLO3NFj+Y89slS7nt7gbcwnHNlVqKXVR0nqT6QBwwxs8Ll5Abw/S4ozGyepLHAfILuqiFmVrg4xM3A80BV4N3wVu6lpIg/9+9IlbQUnv5sGfvyC/jj+R1ISfEWhnOubEl0N1Svg5Rfc5Dy+4D7iinPBTqWanBlhCT+cF57qqSl8I//fkXegQLu69+JVE8YzrkyJNEtC3cIJPGrs9tSJS2F4R8vYX9+AQ9c0pm0VL/A3jlXNniyKCMk8bMftyEjLYUH3/+S/QcKePjyrqR7wnDOlQGeLMqYW89oTUZaCn9+ZyGbd+9j+BXH07BmZrLDcs5Vcv61tQwa3LsVf70sWNP73OGfMfWrLckOyTlXyXmyKKMu6pbN60N6UrNKGlc+PZUnJiz1U2udc0njyaIMa9uoFm/c2pO+HRpx/7sLuXHkDHZ8k5fssJxzlZAnizKuZmY6j155PH84rz3jF22k3yOfMnfNjmSH5ZyrZDxZlAOSuLZnS8bcdDL5B4yLRkzipakrvVvKOXfEeLIoR7q3qMvbt/fixJb1+PVrc7hr7Cz27M+PvqNzzh0mTxblTL3qGTx/bQ/u7HMcr81cw4WPTWTppt3JDss5V8F5siiHUlPEHX1aM/K6HmzevZ/zH/mMf88u1xPxOufKOE8W5Viv1lm8ffuptGlUk1tf+oJ735zH/vyCZIflnKuAPFmUc41rV2XMTSdz/akteX7Sci77x2TWbP8m2WE55yoYTxYVQHpqCr/r154RA7uxZONuzh3+KeMXbUx2WM65CsSTRQVydqfGvHXbqTSqlcm1z0/nr+8v4kCBn17rnDt8niwqmJYNqvPaLT25uFs2wz9ewrXPT2f7nv3JDss5V855sqiAqmak8uClXfi/izoxZekWznv0M+av3ZnssJxz5Zgniwrsih7NGXPTSeTlGxeNmMjrX6xJdkjOuXLKk0UFd3zzurx126l0zq7D0DEz+eNb88g74KfXOudi48miEsiqWYUXbziRa3sezXMTl3PV01PZvHtfssNyzpUjniwqifTUFP5wXgcevjxYVOm8Rz5j5qrtyQ7LOVdOeLKoZPofn824m08hNUVc9sRkxkxfmeyQnHPlgCeLSqhj09q8deupnHhMPX45bg6/fm0O+/IPJDss51wZltBkIekOSXMlzZM0NKL8NkmLwvIHIsrvkbQkfO6siPLukuaEzw2XpETGXRnUDWevvflHrXhp6koGPDmF9Tv2Jjss51wZlbBkIakjcCPQA+gC9JPUWtLpwAVAZzPrADwYbt8eGAB0APoCj0tKDasbAQwGWoe3vomKuzJJTRG/7NuWxwd2Y9H6XfR75DOmL9+a7LCcc2VQIlsW7YApZrbHzPKBCUB/4GbgfjPbB2BmhZMYXQC8bGb7zGwZsAToIakxUMvMJluwNNxI4MIExl3pnNOpMa8P6UnNzDSueHIK/5y03Ffhc859TyKTxVygt6T6kqoB5wDNgOOAXpKmSpog6YRw+6bAqoj9V4dlTcP7Rct/QNJgSbmScjdt2lTKh1OxHXdUTV4f0pPTjsviD2/O465XZrE3z8cxnHOBhCULM1sADAM+AN4DZgH5QBpQFzgJ+DkwNhyDKG4cwkooL+41nzSzHDPLycrKOvyDqGRqV03nqUE5DO3Tmlc/X8MlT0xi9bY9yQ7LOVcGJHSA28yeMbNuZtYb2AosJmgZvGqBaUAB0CAsbxaxezawNizPLqbcJUBKihja5zieuTqHFVv2cN4jnzH1qy3JDss5l2SJPhuqYfizOXARMBp4HTgjLD8OyAA2A28CAyRVkdSSYCB7mpmtA3ZJOilsgQwC3khk3A7ObHcUb956KvWqZ/CTZ6bx5izPz85VZom+zmKcpPnAW8AQM9sGPAscI2ku8DJwddjKmAeMBeYTdFsNMbPCTvObgacJBr2XAu8mOG5HMN35uJtPoWvzOtw++gtGjF/qA9/OVVKqqH/8OTk5lpubm+wwKoR9+Qe4+5XZvDVrLVee2Jz/Pb8Daal+PadzFZGkGWaWU7Q8LRnBuPKlSloqf7+8K9l1qzJi/FLWbf+GR6/sRvUq/vFxrrLwr4fukKSEF/Dd178jE77cxOVPTmbjTr/i27nKwpOFi8nAE1vw9NU5fLXpa/o/PonFG3YlOyTn3BHgycLF7Iy2RzFm8MnsP1DARSMmMXmpn1rrXEXnycLFpVN2bV675RSOqpXJoGen+pKtzlVwnixc3LLrVmPcT0+he4u6DB0zk8c+WeKn1jpXQXmycIeldrV0/nldDy7o2oS//GcRv35tDvm+xrdzFY6f++gOW5W0VP52eVea1a3Go58sYe32vTw2sBs1/NRa5yoMb1m4UiGJu89qw/9d1InPlmzmsicms8FPrXWuwvBk4UrVFT2a8/TVOazY8jX9H5vIovV+aq1zFYEnC1fqTm/TkLE/PZn8AuOSEZOYtGRzskNyzh0mTxYuITo0qc1rQ3rSuE4mP3l2GkNe+pxJSzf72VLOlVM+AukSpmmdqrzy01N45KPFvDJjNW/PXkerrOoMPLEFF3fLpna19GSH6Jw7RD7rrDsi9uYd4O3Z6xg1dQVfrNxOZnoK53VuwsCTWtAluzbBUiXOuWQ72KyznizcETdv7Q5enLqS179Yw579B+jYtBYDT2zBBV2bUC3DG7vOJZMnC1fm7Nqbx+sz1/LilBUsXL+LmlXS6N+tKQNPbEGbRjWTHZ5zlZInC1dmmRmfr9zGqCkreXv2OvYfKOCEo+ty1Ukt6NuxEVXSUpMdonOVhicLVy5s/Xo//5qxihenrmTFlj3Uq57BpTnZXNmjOS3qV092eM5VeJ4sXLlSUGBMXLqZUVNW8OGCjRSYcUabhlzbsyU9j63vA+LOJYgnC1durd+xl5emreSlqSvYvHs/xx1Vg2tOaUn/45tSNcO7qJwrTZ4sXLm3N+8A/569jucmLmPe2p3UrprOgB7NGHTy0TStUzXZ4TlXIcSVLCSdAKwys/Xh40HAxcAK4F4z25qgeA+bJ4uKy8yYvnwbz01cxn/mrUcSZ3U4imt7tiSnRV3vonLuMMSbLD4H+pjZVkm9gZeB24CuQDszuyRRAR8uTxaVw+pte3hh8gpGT1vJzr35dGxai2tPaUm/Lo39LCrn4nCwZBFtbqjUiNbD5cCTZjbOzH4HHHsIL3qHpLmS5kkaGpbdK2mNpJnh7ZyI7e+RtETSIklnRZR3lzQnfG64/KujC2XXrcY957Rjyq/P5L7+HdmbV8Bdr8yi5/2f8PAHX7Jxl0+T7lxpiJosJBVeUnsm8HHEcyVeaiupI3Aj0APoAvST1Dp8+mEz6xre3gm3bw8MADoAfYHHJRV+NRwBDAZah7e+h3JwrvKolpHGwBNb8MGdvXnh+h50zq7N3z9aTM/7P+ZnY2YyZ/WOZIfoXLkWbW6F0cAESZuBb4BPASQdC0T762sHTDGzPeE+E4D+JWx/AfCyme0DlklaAvSQtByoZWaTw3pGAhcC70Z5fVcJSaJX6yx6tc5i2eav+eek5bySu4pXv1hDz2Pr88RV3amZ6RMYOherElsWZnYfcBfwPHCqfTfAkUIwdlGSuUBvSfUlVQPOAZqFz90qabakZyXVDcuaAqsi9l8dljUN7xct/wFJgyXlSsrdtGlTlPBcRdeyQXXuPb8Dk399Jr85px1TvtrKbaO/8DXCnYtDicki/Cc/w8xeM7OvJbWRdCfQ0cw+L2lfM1sADAM+AN4DZgH5BF1KrQgGydcBDxW+XHHVlFBe3Gs+aWY5ZpaTlZVVUniuEqmVmc6NvY/hj+d3YPyiTdz3zoJkh+RcuRNtzOI94Gj4tutpMnAMMETS/0Wr3MyeMbNuZtYb2AosNrMNZnbAzAqApwjGNCBoMTSL2D0bWBuWZxdT7lxMrjqpBdf2PJrnJi5n1JQVyQ7HuXIlWrKoa2aLw/tXA6PN7DbgbKBftMolNQx/NgcuAkZLahyxSX+C7iqAN4EBkqpIakkwkD3NzNYBuySdFJ4FNQh449AOz7nv++257Tm9TRZ/eHMeny325V6dO1TRkkVkd88ZBF1KmNl+4FA6fsdJmg+8BQwxs23AA+FpsLOB04E7wzrnAWOB+QQtmiFmdiCs52bgaWAJsBQf3HZxSk0Rw684nmOzanDzizNYsnF3skNyrlyIdlHeKGA9sAb4FdDSzPZIqgNMMLMuRybM2PlFea4kq7bu4cLHJlIjM43Xb+lJ3eoZyQ7JuTIh3ovybgQ2E4xb/LjwNFigPfBgqUbo3BHUrF41nhzUnXU79vLTUTPYn+9nSDlXkmjJogbwlpndYWazIsp3EnQVOVdudW9Rjwcu7szUZVv57etzqKiTajpXGqIli0eABsWUNwX+XvrhOHdkXXh8U24/41jG5q7mqU+/SnY4zpVZ0ZJFJzObULTQzP4DdE5MSM4dWUP7HMe5nRrzf+8u5IP5G5IdjnNlUrRkUdK8CD5ngqsQUlLEg5d2oXPT2tzx8hfMW+vzSDlXVLRksThyVthCks4GvM3uKoyqGak8NSiH2lXTueGfuWzc6bPVOhcpWrIYCvxN0vOSbgtv/yQYr7gj8eE5d+Q0rJXJ01fnsOObPG4cmcvevAPRd3KukoiWLM4FrgcmAi3C2wSgs5l9meDYnDviOjSpzd8u78rsNTu465VZFBT4GVLOQfRkkU0wGeADQA6wH9gAVEtwXM4lzY87NOJXfdvy9ux1/O2jxdF3cK4SKHE9CzO7G0BSBkGyOAW4DnhK0nYza5/4EJ078gb3PoYlG3cz/KPFtMqqzgVdi50V37lKI9riR4WqArWA2uFtLTAnUUE5l2ySuK9/J1Zs3cPP/zWb7LrV6N6ibvQdnaugoq1n8aSkicAY4GRgEnBpuGbEtUciQOeSJSMthX9c1Z3GtTO56YVcVm/bE30n5yqoaGMWzYEqfDeZ4Gpge6KDcq6sqFs9g2euPoF9+QVc/3wuu/bmJTsk55KixFlnAcI1JDoQjFecAnQkWMhospn9IeERxslnnXWl6bPFm7n6uWm0bliDto1qUjMznRqZadTMTKNmZjq1MtOoUSW4XzO8XyvcJjWluMUenSubDjbrbNQxi3Dd7bmStgM7wls/ghXuymyycK40ndq6AQ9d2oWnPv2KGSu3sXtvPrv25pN/CKfWVs9IDRNLkEiOb1aXX/RtQ2Z66hGI3LnSEW09i9sJWhM9gTyC6y0mhz/nhEujlknesnCJZmbszStg1948du0LkseuvXnfJpKde/PYHVG+a28+2/bsZ8pXW+nWvA7/+EkOWTWrJPswnPueeFsWRwP/Au4Mlzd1zoUkUTUjlaoZqTSMYb+3Z6/jrldmcuFjE3lqUA7tm9RKWIzOlZYSB7jN7Gdm9i9PFM6VnnM7N+aVm07hQIFxyROT+M+89ckOybmoop0N5ZxLgE7ZtXnj1p60bliDn46awePjl/jiS65M82ThXJIcVSuTMTedzLmdGvPAe4u4a+wsn7zQlVmHegW3cy4BMtNTeeSK42ndsCYPf/gly7d87QPfrkzyloVzSSaJO/q05rEruzF/3U4ufGwi89fuTHZYzn2PJwvnyojCge/8ggIueWIS7/vAtytDPFk4V4Z0yq7Nm7eeyrENa3DTqBmMGL/UB75dmZDQZCHpDklzJc2TNLTIc3dLMkkNIsrukbRE0iJJZ0WUd5c0J3xueDgFiXMV0lG1Mhkz+GTO6dSYYe8t5K5XZrEv3we+XXIlLFlI6gjcSDAtSBegn6TW4XPNgP8BVkZs3x4YQDAPVV/gcUmF8yGMAAYDrcNb30TF7VxZUDUjlUevOJ6hfVrz6udruPKpqWzevS/ZYblKLJEti3bAFDPbY2b5BMux9g+fexj4BRDZvr4AeNnM9pnZMmAJ0ENSY6CWmU0O56kaCVyYwLidKxMkMbTPcTx2ZTfmrd3BBY9OZME6H/h2yZHIZDEX6C2pvqRqwDlAM0nnA2vMbFaR7ZsCqyIerw7Lmob3i5b/gKTBknIl5W7atKm0jsO5pDq3c2PG3nQy+QUFXDxiEh/M35DskFwllLBkYWYLCNbv/gB4D5gF5AO/AX5fzC7FjUNYCeXFveaT4cJMOVlZWXHF7VxZ1Dm7Dm8MCQa+B7+Qy5/fWeCLMbkjKqED3Gb2jJl1M7PeBGtgLAdaArMkLQeygc8lNSJoMTSL2D2bYPnW1eH9ouXOVSqNagcD3/2Pb8pTn35Frwc+4epnp/He3HXkHSizE0C7CiLq4keHVbnU0Mw2SmoOvA+cbGbbIp5fDuSY2WZJHYCXCAbEmwAfAa3N7ICk6cBtwFTgHeARM3unpNf2KcpdRbZ62x7G5q5m7PRVrN+5lwY1qnBpTjYDTmhGi/rVkx2eK8cONkV5opPFp0B9grUwfmZmHxV5fjlhsggf/wa4jqC7aqiZvRuW5wDPA1WBd4HbLErgnixcZZB/oIAJX25i9LSVfLxwIwUGPY+tzxU9mvPj9o3ISPNLqVxskpIsksmThats1u34hldyVzNm+irWbP+GetUzuKR70No4JqtGssNz5YQnC+cqiQMFxhBWSEsAABZwSURBVKeLN/HytFV8uGAD+QXGiS3rcUWP5vTt2MiXc3Ul8mThXCW0cdde/jVjNS9PW8XKrXuoUy2d/sc35YoezTnuqJrJDs+VQZ4snKvECgqMyV9t4aVpK3l/3nryDhg/apPFH87rQMsGPiDuvuPJwjkHwJbd+xiTu4rHP1nK/vwCfnraMdxy+rHePeWAgycLP1XCuUqmfo0q3PKjY/n4rtM4u1Mjhn+8hD5/ncCHfmW4K4EnC+cqqYa1Mvn7gON56cYTyUxP5YaRudzwz+ms2upXhrsf8mThXCV3SqsGvHN7L+45uy2Tlm6hz18n8MhHi31adPc9niycc2SkpXDTaa346K7T6NPuKB764EvOevi/TPjSJ+R0AU8WzrlvNa5dlccGdmPkdT1Ikbj62WncPGoGa7d/k+zQXJJ5snDO/UDv47J4d2gvfn5WGz5ZtJEzH5rAiPHB2VOucvJk4ZwrVpW0VIacfiwf3HkavVo3YNh7Czln+KdMWrI52aG5JPBk4ZwrUbN61XhyUA7PXpPD/vwCrnx6KreN/oINO/cmOzR3BKUlOwDnXPlwRtujOKVVA0aMX8qICUv5ZOFGLs3J5oy2DenRsh5V0vyivorMr+B2zsVsxZavGfbeQj5asJF9+QVUy0jllFYNOL1tFqe3aUiTOlWTHaKL08Gu4PaWhXMuZi3qV+fxgd35Zv8Bpny1hU8WbeTjhRv5cEFwFXjbRjX5UZuGnN4mi24t6pKe6j3e5Z23LJxzpcLMWLppN58s3MQnizYyfflW8g4YNTPT6N06ix+1yeK0Nlk0rJmZ7FBdCXwiQefcEbVrbx4Tl2xh/KKNfLJoIxt27gOgU9PanN4mi9PbNqRzdh1SU5TkSF0kTxbOuaQxMxas28UnizYyftFGZqzYRoFBveoZ/OG89lzQtWmyQ3QhH7NwziWNJNo3qUX7JrUYcvqxbN+zn08Xb+a5icv42dhZ1MpM5/S2DZMdpiuBjzo55464OtUyOK9LE0ZefyLtGtfklhc/54uV25IdliuBJwvnXNLUqJLGc9f0oGGtKlz3/HSWbtqd7JDcQXiycM4lVVbNKoy8rgepKWLQM9P8yvAyypOFcy7pWtSvznPX9GD7nv1c/ew0du7NS3ZIroiEJgtJd0iaK2mepKFh2Z8kzZY0U9L7kppEbH+PpCWSFkk6K6K8u6Q54XPDJfm5ds5VMJ2ya/PET7qzdNNubvxnLnvzfPGlsiRhyUJSR+BGoAfQBegnqTXwFzPrbGZdgX8Dvw+3bw8MADoAfYHHJRVONjMCGAy0Dm99ExW3cy55erXO4sFLuzB12VbuHDOTAwUV89T+8iiRLYt2wBQz22Nm+cAEoL+Z7YzYpjpQ+Gm4AHjZzPaZ2TJgCdBDUmOglplNtuCikJHAhQmM2zmXRBd0bcpvz23Hu3PX88e35lFRrwUrbxJ5ncVc4D5J9YFvgHOAXABJ9wGDgB3A6eH2TYEpEfuvDsvywvtFy39A0mCCFgjNmzcvreNwzh1hN/Q6hk279vGP/35Fw5pVuPWM1skOqdJLWMvCzBYAw4APgPeAWUB++NxvzKwZ8CJwa7hLceMQVkJ5ca/5pJnlmFlOVlbWYR6Bcy6Zftm3LRcd35QH3/+SMdNXJjucSi+hA9xm9oyZdTOz3sBWYHGRTV4CLg7vrwaaRTyXDawNy7OLKXfOVWApKWLYJZ3pfVwW97w6hw/nb0h2SJVaos+Gahj+bA5cBIwOB7kLnQ8sDO+/CQyQVEVSS4KB7Glmtg7YJemk8CyoQcAbiYzbOVc2pKemMGJgNzo1rc2Qlz5nxoqtyQ6p0kr0dRbjJM0H3gKGmNk24P7wdNrZwI+BOwDMbB4wFphP0G01xMwKz527GXiaYNB7KfBuguN2zpUR1auk8ew1J9CkTlWuez6XJRt3JTukSslnnXXOlQurtu7hohGTSE8R4245hca1fTW+RDjYrLN+BbdzrlxoVq8az197Ajv35nP1s9PYscev8j6SPFk458qNDk1q8+Sg7izfvIcbRk73q7yPIE8Wzrly5ZRWDfjr5V3IXbGN20d/Qf6BgmSHVCn44kfOuXKnX+cmbN61j3vfms/v3pjHnX1ak1dg5B8oIO+AkV9QQP4BI+9AAfkF4c+wPO+Afftc4fOpKeKcTo2pUcX/JR6MvzPOuXLpmp4t2bhrH4+PX8roaYd/0d6jHy/h4cu70r1F3VKIruLxZOGcK7d+flYbOjatzZav95OeItJTU0hLDX9GPE5LSSE9VaSF5Rlp339+2aav+cW42Vz6xCRuPaM1t51xLOmp3ksfyU+ddc45YNfePO59cz7jPl9N12Z1ePjyrrRsUD3ZYR1xfuqsc86VoGZmOg9d1oXHruzGss1fc+7wT3l52kqf9TbkycI55yKc27kx7w3txfHN6/CrV+cw+IUZbNm9L9lhJZ0nC+ecK6Jx7aq8cN2J/PbcdkxYtImz/vYpnyzamOywksqThXPOFSMlRdzQ6xjevK0n9atncO1z0/n9G3P5Zn/lvBDQk4VzzpWgbaNavHFrT64/tSUjJ6/gvEc/Y+6aHckO64jzZOGcc1Fkpqfyu37tGXX9iezam0f/xycyYvzSSrVGuCcL55w7RKe2bsB/hvbmf9ofxbD3FnLFU1NYvW1PssM6IjxZOOdcDOpUy+CxK7vx0KVdmL92J2f/7VNe/2JNssNKOE8WzjkXI0lc3D2bd+/oxXGNajJ0zExufelzZq/eXmGvy/AruJ1z7jDkHyjgiQlL+ftHi8k7YDSrV5VzOjXm3E6N6dS0NsFq0OXHwa7g9mThnHOlYPue/bw/fwPvzFnHZ4s3k19QPhOHJwvnnDtCChPH27PXMXHJ9xNHv05N6Ni0VplNHJ4snHMuCbbv2c/78zbw9pzvEkfzetW+bXGUtcThycI555KsMHH8e846JpXRxOHJwjnnypBtX+/n/fnreXvO+m8TxzENqvPnizpx0jH1kxaXJwvnnCujChPHiPFLWbF1Dzef1oqhfY4jI+3IX92QlPUsJN0haa6keZKGhmV/kbRQ0mxJr0mqE7H9PZKWSFok6ayI8u6S5oTPDVey22nOOVeK6lbP4PITmvP27b24rHszHh+/lEuemMRXm3YnO7RvJSxZSOoI3Aj0ALoA/SS1Bj4AOppZZ+BL4J5w+/bAAKAD0Bd4XFJqWN0IYDDQOrz1TVTczjmXLNWrpDHsks6MGNiNFVv2cO7wz8rMAkyJbFm0A6aY2R4zywcmAP3N7P3wMcAUIDu8fwHwspntM7NlwBKgh6TGQC0zm2zBOzYSuDCBcTvnXFKd3en7CzDdPOpztn29P6kxJTJZzAV6S6ovqRpwDtCsyDbXAe+G95sCqyKeWx2WNQ3vFy3/AUmDJeVKyt20aVMpHIJzziVH49pVGXX9idxzdls+WriBvn//LxOXbE5aPAlLFma2ABhG0O30HjALKGxRIOk34eMXC4uKq6aE8uJe80kzyzGznKysrMOI3jnnki8lRdx0Witeu6Un1aukMfDpqfz5nQXsyz/yCzAldIDbzJ4xs25m1hvYCiwGkHQ10A8YaN91xq3m+y2PbGBtWJ5dTLlzzlUKHZvW5u3bejHwxOY8+d+v6P/YJJZs3HVEY0j02VANw5/NgYuA0ZL6Ar8EzjezyIng3wQGSKoiqSXBQPY0M1sH7JJ0UngW1CDgjUTG7ZxzZU3VjFTu69+JpwblsH7nXs4d/hkvTFlxxAa/E30S7zhJ84G3gCFmtg14FKgJfCBppqQnAMxsHjAWmE/QbTXEzArbWjcDTxMMei/lu3EO55yrVP6n/VG8d0cverSsx+9en8uNI3PZsntfwl/XL8pzzrlyqKDAeH7Scu5/dyG1qqbz4KWd+VGbhoddb1IuynPOOZcYKSniulNb8satPalXPZ1rnpvOH9+ax968xAx+e7JwzrlyrF3jWrx566lcc8rRPDdxORc8OpENO/eW+uuklXqNzjnnjqjM9FTuPb8Dp7XJ4uVpK6lfPaPUX8OThXPOVRCnt2nI6aUwblEc74ZyzjkXlScL55xzUXmycM45F5UnC+ecc1F5snDOOReVJwvnnHNRebJwzjkXlScL55xzUVXYiQQlbQJWxLl7A+Bwl6TyOspWDF6H15HoOspCDKVRRwsz+8HqcRU2WRwOSbnFzbrodZTfGLwOryPRdZSFGEqrjuJ4N5RzzrmoPFk455yLypNF8Z70Okq1jrIQg9fhdSS6jrIQQ2nV8QM+ZuGccy4qb1k455yLypOFc865qDxZRJD0rKSNkubGuX8zSZ9IWiBpnqQ74qgjU9I0SbPCOv4YTyxhXamSvpD07zj3Xy5pjqSZknLjrKOOpH9JWhi+LyfHuH+b8PULbzslDY0jjjvD93OupNGSMuOo445w/3mHGkNxnylJ9SR9IGlx+LNuHHVcGsZRICnqaZIHqeMv4e9ltqTXJNWJo44/hfvPlPS+pCax7B/x3N2STFKDOGK4V9KaiM/IObHWEZbfJmlR+L4+EEccYyJiWC5pZhx1dJU0pfBvTlKPOOroImly+Lf7lqRaJdVxyMzMb+EN6A10A+bGuX9joFt4vybwJdA+xjoE1AjvpwNTgZPijOdnwEvAv+PcfznQ4DDf038CN4T3M4A6h1FXKrCe4KKhWPZrCiwDqoaPxwLXxFhHR2AuUI1ghckPgdbxfKaAB4Bfhfd/BQyLo452QBtgPJATZxw/BtLC+8PijKNWxP3bgSdi2T8sbwb8h+Ai2hI/bweJ4V7g7hh+l8XVcXr4O60SPm4Yax1Fnn8I+H0ccbwPnB3ePwcYH0cd04HTwvvXAX+K5bN+sJu3LCKY2X+BrYex/zoz+zy8vwtYQPCPKpY6zMx2hw/Tw1vMZyFIygbOBZ6Odd/SEn6j6Q08A2Bm+81s+2FUeSaw1MziuTI/DagqKY3gH/7aGPdvB0wxsz1mlg9MAPpH2+kgn6kLCJIo4c8LY63DzBaY2aJDjP1gdbwfHgvAFCA7jjp2RjysTgmf1RL+vh4GflHSvodQxyE7SB03A/eb2b5wm43xxiFJwGXA6DjqMKCwJVCbKJ/Tg9TRBvhveP8D4OKS6jhUniwSRNLRwPEELYNY900Nm7AbgQ/MLOY6gL8R/AEWxLFvIQPelzRD0uA49j8G2AQ8F3aHPS2p+mHEM4Aof4DFMbM1wIPASmAdsMPM3o+xmrlAb0n1JVUj+NbXLNZYQkeZ2bowtnVAYhZNjs11wLvx7CjpPkmrgIHA72Pc93xgjZnNiue1I9wadoc9G61b7yCOA3pJmippgqQTDiOWXsAGM1scx75Dgb+E7+eDwD1x1DEXOD+8fynxf06/x5NFAkiqAYwDhhb55nVIzOyAmXUl+KbXQ1LHGF+/H7DRzGbE+tpF9DSzbsDZwBBJvWPcP42giTzCzI4HvibodomZpAyCP4BX4ti3LsG3+ZZAE6C6pKtiqcPMFhB01XwAvAfMAvJL3KmckPQbgmN5MZ79zew3ZtYs3P/WGF63GvAbYkwwxRgBtAK6EnwZeCiOOtKAusBJwM+BsWELIR5XEMeXmtDNwJ3h+3knYas8RtcR/L3OIOgO3x9nLN/jyaKUSUonSBQvmtmrh1NX2GUzHugb4649gfMlLQdeBs6QNCqO118b/twIvAaUONhWjNXA6oiW0b8Ikkc8zgY+N7MNcezbB1hmZpvMLA94FTgl1krM7Bkz62ZmvQma/vF8cwTYIKkxQPizxC6PRJJ0NdAPGGhhJ/dheInYujxaESTwWeFnNRv4XFKjWF7UzDaEX7AKgKeI/XMKwWf11bAbeBpBi7zEwfbihN2cFwFj4ogB4GqCzycEX4xiPhYzW2hmPzaz7gRJa2mcsXyPJ4tSFH4TeQZYYGZ/jbOOrMKzUiRVJfhHtzCWOszsHjPLNrOjCbpuPjazmL5JS6ouqWbhfYLB0JjOEjOz9cAqSW3CojOB+bHUEeFwvq2tBE6SVC38HZ1JMJ4UE0kNw5/NCf4hxBvPmwT/FAh/vhFnPYdFUl/gl8D5ZrYnzjpaRzw8nxg+q2Y2x8wamtnR4Wd1NcEJIutjjKFxxMP+xPg5Db0OnBHWdxzByRjxzNzaB1hoZqvj2BeCMYrTwvtnEMcXkojPaQrwW+CJOGP5vtIYJa8oN4I//nVAHsEH9/oY9z+VoJ9/NjAzvJ0TYx2dgS/COuYS5YyKQ6jvR8RxNhTBeMOs8DYP+E2cr98VyA2P53Wgbhx1VAO2ALUP4334I8E/srnAC4RnvcRYx6cEyW4WcGa8nymgPvARwT+Cj4B6cdTRP7y/D9gA/CeOOpYAqyI+qwc9k6mEOsaF7+ls4C2gaSz7F3l+OdHPhiouhheAOWEMbwKN46gjAxgVHsvnwBmx1hGWPw/89DA+G6cCM8LP2FSgexx13EFwJuaXwP2EM3Uc7s2n+3DOOReVd0M555yLypOFc865qDxZOOeci8qThXPOuag8WTjnnIvKk4Url8IZSh+KeHy3pHtLqe7nJV1SGnVFeZ1LFczE+0ki45J0tKQrY4/Que94snDl1T7gomhTWh9pklJj2Px64BYzOz1R8YSOBmJKFjEeh6sEPFm48iqfYK3hO4s+UfQbuKTd4c8fhZPEjZX0paT7JQ1UsH7IHEmtIqrpI+nTcLt+4f6pCtaAmB5OWndTRL2fSHqJ4OKwovFcEdY/V9KwsOz3BBdgPSHpL8Xs84twn1mS7i/m+eWFiVJSjqTx4f3T9N2aCl+EV+HfTzBJ3kwF63oc0nGEV/G/HcYwV9Llh/KLcRVTWrIDcO4wPAbMVpSFaoroQjDd+FbgK+BpM+uhYKGq2whm/YTg2/hpBPMXfSLpWGAQwYy1J0iqAkyUVDh7bQ+go5kti3wxBYsBDQO6A9sIZvG90Mz+V9IZBOsw5BbZ52yCactPNLM9kurFcHx3A0PMbGI4oeVegskb7zazwqQ3+FCOQ9LFwFozOzfcr3YMcbgKxlsWrtyyYEbfkQSL7hyq6RasO7KPYIK1wn+ScwgSRKGxZlZgwTTTXwFtCebHGqRg+vipBNN2FM6NNK1oogidQLCAzSYL1o54kWCNj5L0AZ6zcL4mM4tl/YaJwF8l3U6w0FRxM+Me6nHMIWhhDZPUy8x2xBCHq2A8Wbjy7m8Eff+R62TkE362w4kDMyKe2xdxvyDicQHfb2kXnQfHCFYxvM3Muoa3lvbduhhfHyS+eKa5VjGvX9S3xwh8u0Ssmd0P3ABUBaZIanuQ+qMeh5l9SdAimgP8X9h15iopTxauXAu/dY8lSBiFlhP8k4NgHYv0OKq+VFJKOI5xDLCIYOnPmxVMQ4+k4xR9MaepwGmSGoSDxlcQrLJXkveB6xSs98BBuqGW890xfjstuKRWFszmOoxgAse2wC6CdQ0KHdJxhF1oe8xsFMFCPPFOL+8qAB+zcBXBQ3x/0Z2ngDckTSOY1fVg3/pLsojgn/pRBLOI7pX0NEFX1edhi2UT0ZdEXSfpHuATgm/075hZiVOSm9l7kroCuZL2A+8Avy6y2R+BZyT9mu+vxjhU0unAAYIZct8laDXlS5pFMCvq3w/xODoRrNpWQDCr6c0lxe0qNp911jnnXFTeDeWccy4qTxbOOeei8mThnHMuKk8WzjnnovJk4ZxzLipPFs4556LyZOGccy6q/w9kfuSLY2sRXQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 探测最好的聚类数是哪个\n",
    "wcss = []\n",
    "for i in range(1, 20):\n",
    "    kmeans = KMeans(n_clusters=i, n_init=20, n_jobs=4)\n",
    "    kmeans.fit(X3)\n",
    "    wcss.append(kmeans.inertia_)\n",
    "plt.plot(range(1, 20), wcss)\n",
    "plt.title(\"Which is the best n_cluster\")\n",
    "plt.xlabel(\"Number of clusters\")\n",
    "plt.ylabel(\"WCSS\")\n",
    "plt.xticks(range(1, 20))\n",
    "plt.show()\n",
    "\n",
    "# 可以看到当 n_clusters 在 9 以上时，线开始变得平缓了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
